mirror of
https://github.com/kubernetes-sigs/node-feature-discovery.git
synced 2024-12-14 11:57:51 +00:00
topology-updater: migrate to structured logging
This commit is contained in:
parent
7be08f9e7f
commit
6e3b181ab4
8 changed files with 85 additions and 75 deletions
|
@ -47,17 +47,19 @@ func main() {
|
|||
|
||||
// Assert that the version is known
|
||||
if version.Undefined() {
|
||||
klog.Warningf("version not set! Set -ldflags \"-X sigs.k8s.io/node-feature-discovery/pkg/version.version=`git describe --tags --dirty --always`\" during build or run.")
|
||||
klog.InfoS("version not set! Set -ldflags \"-X sigs.k8s.io/node-feature-discovery/pkg/version.version=`git describe --tags --dirty --always`\" during build or run.")
|
||||
}
|
||||
|
||||
// Get new TopologyGC instance
|
||||
gc, err := nfdtopologygarbagecollector.New(args)
|
||||
if err != nil {
|
||||
klog.Exit(err)
|
||||
klog.ErrorS(err, "failed to initialize topology garbage collector instance")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if err = gc.Run(); err != nil {
|
||||
klog.Exit(err)
|
||||
klog.ErrorS(err, "error while running")
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -57,47 +57,28 @@ func main() {
|
|||
|
||||
// Assert that the version is known
|
||||
if version.Undefined() {
|
||||
klog.Warningf("version not set! Set -ldflags \"-X sigs.k8s.io/node-feature-discovery/pkg/version.version=`git describe --tags --dirty --always`\" during build or run.")
|
||||
klog.InfoS("version not set! Set -ldflags \"-X sigs.k8s.io/node-feature-discovery/pkg/version.version=`git describe --tags --dirty --always`\" during build or run.")
|
||||
}
|
||||
|
||||
// Plug klog into grpc logging infrastructure
|
||||
utils.ConfigureGrpcKlog()
|
||||
|
||||
u, err := url.ParseRequestURI(resourcemonitorArgs.KubeletConfigURI)
|
||||
klConfig, err := getKubeletConfig(resourcemonitorArgs.KubeletConfigURI, resourcemonitorArgs.APIAuthTokenFile)
|
||||
if err != nil {
|
||||
klog.Exitf("failed to parse args for kubelet-config-uri: %v", err)
|
||||
}
|
||||
|
||||
// init kubelet API client
|
||||
var klConfig *kubeletconfigv1beta1.KubeletConfiguration
|
||||
switch u.Scheme {
|
||||
case "file":
|
||||
klConfig, err = kubeconf.GetKubeletConfigFromLocalFile(u.Path)
|
||||
if err != nil {
|
||||
klog.Exitf("failed to read kubelet config: %v", err)
|
||||
}
|
||||
case "https":
|
||||
restConfig, err := kubeconf.InsecureConfig(u.String(), resourcemonitorArgs.APIAuthTokenFile)
|
||||
if err != nil {
|
||||
klog.Exitf("failed to initialize rest config for kubelet config uri: %v", err)
|
||||
}
|
||||
|
||||
klConfig, err = kubeconf.GetKubeletConfiguration(restConfig)
|
||||
if err != nil {
|
||||
klog.Exitf("failed to get kubelet config from configz endpoint: %v", err)
|
||||
}
|
||||
default:
|
||||
klog.Exitf("unsupported URI scheme: %v", u.Scheme)
|
||||
klog.ErrorS(err, "failed to get kubelet configuration")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Get new TopologyUpdater instance
|
||||
instance, err := topology.NewTopologyUpdater(*args, *resourcemonitorArgs, klConfig.TopologyManagerPolicy, klConfig.TopologyManagerScope)
|
||||
if err != nil {
|
||||
klog.Exit(err)
|
||||
klog.ErrorS(err, "failed to initialize topology updater instance")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if err = instance.Run(); err != nil {
|
||||
klog.Exit(err)
|
||||
klog.ErrorS(err, "error while running")
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -153,3 +134,34 @@ func initFlags(flagset *flag.FlagSet) (*topology.Args, *resourcemonitor.Args) {
|
|||
|
||||
return args, resourcemonitorArgs
|
||||
}
|
||||
|
||||
func getKubeletConfig(uri, apiAuthTokenFile string) (*kubeletconfigv1beta1.KubeletConfiguration, error) {
|
||||
u, err := url.ParseRequestURI(uri)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse -kubelet-config-uri: %w", err)
|
||||
}
|
||||
|
||||
// init kubelet API client
|
||||
var klConfig *kubeletconfigv1beta1.KubeletConfiguration
|
||||
switch u.Scheme {
|
||||
case "file":
|
||||
klConfig, err = kubeconf.GetKubeletConfigFromLocalFile(u.Path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read kubelet config: %w", err)
|
||||
}
|
||||
return klConfig, err
|
||||
case "https":
|
||||
restConfig, err := kubeconf.InsecureConfig(u.String(), apiAuthTokenFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to initialize rest config for kubelet config uri: %w", err)
|
||||
}
|
||||
|
||||
klConfig, err = kubeconf.GetKubeletConfiguration(restConfig)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get kubelet config from configz endpoint: %w", err)
|
||||
}
|
||||
return klConfig, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("unsupported URI scheme: %v", u.Scheme)
|
||||
}
|
||||
|
|
|
@ -85,27 +85,27 @@ func newTopologyGC(config *restclient.Config, stop chan struct{}, gcPeriod time.
|
|||
func (n *topologyGC) deleteNRT(nodeName string) {
|
||||
if err := n.topoClient.TopologyV1alpha2().NodeResourceTopologies().Delete(context.TODO(), nodeName, metav1.DeleteOptions{}); err != nil {
|
||||
if errors.IsNotFound(err) {
|
||||
klog.V(2).Infof("NodeResourceTopology for node %s not found, omitting deletion", nodeName)
|
||||
klog.V(2).InfoS("NodeResourceTopology not found, omitting deletion", "nodeName", nodeName)
|
||||
return
|
||||
} else {
|
||||
klog.Warningf("failed to delete NodeResourceTopology for node %s: %s", nodeName, err.Error())
|
||||
klog.ErrorS(err, "failed to delete NodeResourceTopology object", "nodeName", nodeName)
|
||||
return
|
||||
}
|
||||
}
|
||||
klog.Infof("NodeResourceTopology for node %s has been deleted", nodeName)
|
||||
klog.InfoS("NodeResourceTopology object has been deleted", "nodeName", nodeName)
|
||||
}
|
||||
|
||||
func (n *topologyGC) deleteNodeHandler(object interface{}) {
|
||||
// handle a case when we are starting up and need to clear stale NRT resources
|
||||
obj := object
|
||||
if deletedFinalStateUnknown, ok := object.(cache.DeletedFinalStateUnknown); ok {
|
||||
klog.V(2).Infof("found stale NodeResourceTopology for node: %s ", object)
|
||||
klog.V(2).InfoS("found stale NodeResourceTopology object", "object", object)
|
||||
obj = deletedFinalStateUnknown.Obj
|
||||
}
|
||||
|
||||
node, ok := obj.(*corev1.Node)
|
||||
if !ok {
|
||||
klog.Errorf("cannot convert %v to v1.Node", object)
|
||||
klog.InfoS("cannot convert object to v1.Node", "object", object)
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -113,13 +113,13 @@ func (n *topologyGC) deleteNodeHandler(object interface{}) {
|
|||
}
|
||||
|
||||
func (n *topologyGC) runGC() {
|
||||
klog.Infof("Running GC")
|
||||
klog.InfoS("Running GC")
|
||||
objects := n.factory.Core().V1().Nodes().Informer().GetIndexer().List()
|
||||
nodes := sets.NewString()
|
||||
for _, object := range objects {
|
||||
key, err := cache.MetaNamespaceKeyFunc(object)
|
||||
if err != nil {
|
||||
klog.Warningf("cannot create key for %v: %s", object, err.Error())
|
||||
klog.ErrorS(err, "failed to create key", "object", object)
|
||||
continue
|
||||
}
|
||||
nodes.Insert(key)
|
||||
|
@ -127,14 +127,14 @@ func (n *topologyGC) runGC() {
|
|||
|
||||
nrts, err := n.topoClient.TopologyV1alpha2().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
|
||||
if err != nil {
|
||||
klog.Warningf("cannot list NRTs %s", err.Error())
|
||||
klog.ErrorS(err, "failed to list NodeResourceTopology objects")
|
||||
return
|
||||
}
|
||||
|
||||
for _, nrt := range nrts.Items {
|
||||
key, err := cache.MetaNamespaceKeyFunc(&nrt)
|
||||
if err != nil {
|
||||
klog.Warningf("cannot create key for %v: %s", nrt, err.Error())
|
||||
klog.ErrorS(err, "failed to create key", "noderesourcetopology", klog.KObj(&nrt))
|
||||
continue
|
||||
}
|
||||
if !nodes.Has(key) {
|
||||
|
@ -151,7 +151,7 @@ func (n *topologyGC) periodicGC(gcPeriod time.Duration) {
|
|||
case <-gcTrigger.C:
|
||||
n.runGC()
|
||||
case <-n.stopChan:
|
||||
klog.Infof("shutting down periodic Garbage Collector")
|
||||
klog.InfoS("shutting down periodic Garbage Collector")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
|
|
@ -76,13 +76,13 @@ func (n *Notifier) Run() {
|
|||
for {
|
||||
select {
|
||||
case <-timeEvents:
|
||||
klog.V(5).Infof("timer update received")
|
||||
klog.V(5).InfoS("timer update received")
|
||||
i := Info{Event: IntervalBased}
|
||||
n.dest <- i
|
||||
|
||||
case e := <-n.fsEvent:
|
||||
basename := path.Base(e.Name)
|
||||
klog.V(5).Infof("fsnotify event from file %q: %q received", basename, e.Op)
|
||||
klog.V(5).InfoS("fsnotify event received", "filename", basename, "op", e.Op)
|
||||
if stateFiles.Has(basename) {
|
||||
i := Info{Event: FSUpdate}
|
||||
n.dest <- i
|
||||
|
|
|
@ -120,8 +120,7 @@ func NewTopologyUpdater(args Args, resourcemonitorArgs resourcemonitor.Args, pol
|
|||
// Run nfdTopologyUpdater. Returns if a fatal error is encountered, or, after
|
||||
// one request if OneShot is set to 'true' in the updater args.
|
||||
func (w *nfdTopologyUpdater) Run() error {
|
||||
klog.Infof("Node Feature Discovery Topology Updater %s", version.Get())
|
||||
klog.Infof("NodeName: '%s'", w.nodeInfo.nodeName)
|
||||
klog.InfoS("Node Feature Discovery Topology Updater", "version", version.Get(), "nodeName", w.nodeInfo.nodeName)
|
||||
|
||||
podResClient, err := podres.GetPodResClient(w.resourcemonitorArgs.PodResourceSocketPath)
|
||||
if err != nil {
|
||||
|
@ -158,16 +157,14 @@ func (w *nfdTopologyUpdater) Run() error {
|
|||
return fmt.Errorf("failed to obtain node resource information: %w", err)
|
||||
}
|
||||
|
||||
klog.V(2).Infof("resAggr is: %v\n", resAggr)
|
||||
|
||||
for {
|
||||
select {
|
||||
case info := <-w.eventSource:
|
||||
klog.V(4).Infof("got %q event. scanning...", info.Event)
|
||||
klog.V(4).InfoS("event received, scanning...", "event", info.Event)
|
||||
scanResponse, err := resScan.Scan()
|
||||
utils.KlogDump(1, "podResources are", " ", scanResponse.PodResources)
|
||||
if err != nil {
|
||||
klog.Warningf("scan failed: %v", err)
|
||||
klog.ErrorS(err, "scan failed")
|
||||
continue
|
||||
}
|
||||
zones = resAggr.Aggregate(scanResponse.PodResources)
|
||||
|
@ -183,7 +180,7 @@ func (w *nfdTopologyUpdater) Run() error {
|
|||
}
|
||||
|
||||
case <-w.stop:
|
||||
klog.Infof("shutting down nfd-topology-updater")
|
||||
klog.InfoS("shutting down nfd-topology-updater")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
@ -240,7 +237,7 @@ func (w *nfdTopologyUpdater) updateNodeResourceTopology(zoneInfo v1alpha2.ZoneLi
|
|||
|
||||
func (w *nfdTopologyUpdater) configure() error {
|
||||
if w.configFilePath == "" {
|
||||
klog.Warningf("file path for nfd-topology-updater conf file is empty")
|
||||
klog.InfoS("no configuration file specified")
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -248,7 +245,7 @@ func (w *nfdTopologyUpdater) configure() error {
|
|||
if err != nil {
|
||||
// config is optional
|
||||
if os.IsNotExist(err) {
|
||||
klog.Warningf("couldn't find conf file under %v", w.configFilePath)
|
||||
klog.InfoS("configuration file not found", "path", w.configFilePath)
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
|
@ -258,7 +255,7 @@ func (w *nfdTopologyUpdater) configure() error {
|
|||
if err != nil {
|
||||
return fmt.Errorf("failed to parse configuration file %q: %w", w.configFilePath, err)
|
||||
}
|
||||
klog.Infof("configuration file %q parsed:\n %v", w.configFilePath, w.config)
|
||||
klog.InfoS("configuration file parsed", "path", w.configFilePath, "config", w.config)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ func NewExcludeResourceList(resMap map[string][]string, nodeName string) Exclude
|
|||
|
||||
func (rl *ExcludeResourceList) IsExcluded(resource corev1.ResourceName) bool {
|
||||
if rl.excludeList.Has(string(resource)) {
|
||||
klog.V(5).InfoS("resource excluded", "resource", resource)
|
||||
klog.V(5).InfoS("resource excluded", "resourceName", resource)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
|
|
@ -77,9 +77,9 @@ func NewResourcesAggregator(podResourceClient podresourcesapi.PodResourcesLister
|
|||
resp, err := podResourceClient.GetAllocatableResources(ctx, &podresourcesapi.AllocatableResourcesRequest{})
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "API GetAllocatableResources disabled") {
|
||||
klog.Error("Kubelet's pod resources 'GetAllocatableResources' functionality is disabled. " +
|
||||
"Ensure feature flag 'KubeletPodResourcesGetAllocatable' is set to true. " +
|
||||
"You can find more about the feature gates from the following URL - " +
|
||||
klog.ErrorS(err, "Kubelet's pod resources 'GetAllocatableResources' functionality is disabled. "+
|
||||
"Ensure feature flag 'KubeletPodResourcesGetAllocatable' is set to true. "+
|
||||
"You can find more about the feature gates from the following URL - "+
|
||||
"https://kubernetes.io/docs/reference/command-line-tools-reference/feature-gates/")
|
||||
}
|
||||
|
||||
|
@ -178,7 +178,7 @@ func (noderesourceData *nodeResources) Aggregate(podResData []PodResources) topo
|
|||
|
||||
costs, err := makeCostsPerNumaNode(noderesourceData.topo.Nodes, nodeID)
|
||||
if err != nil {
|
||||
klog.Infof("cannot find costs for NUMA node %d: %v", nodeID, err)
|
||||
klog.ErrorS(err, "failed to calculate costs for NUMA node", "nodeID", nodeID)
|
||||
} else {
|
||||
zone.Costs = costs
|
||||
}
|
||||
|
@ -212,7 +212,7 @@ func getContainerDevicesFromAllocatableResources(availRes *podresourcesapi.Alloc
|
|||
for _, cpuID := range availRes.GetCpuIds() {
|
||||
nodeID, ok := cpuIDToNodeIDMap[int(cpuID)]
|
||||
if !ok {
|
||||
klog.Infof("cannot find the NUMA node for CPU %d", cpuID)
|
||||
klog.InfoS("failed to find the NUMA node for CPU", "cpuID", cpuID)
|
||||
continue
|
||||
}
|
||||
|
||||
|
@ -243,16 +243,16 @@ func (noderesourceData *nodeResources) updateAvailable(numaData map[int]map[core
|
|||
resName := string(ri.Name)
|
||||
resMap, ok := noderesourceData.resourceID2NUMAID[resName]
|
||||
if !ok {
|
||||
klog.Infof("unknown resource %q", ri.Name)
|
||||
klog.InfoS("unknown resource", "resourceName", ri.Name)
|
||||
continue
|
||||
}
|
||||
nodeID, ok := resMap[resID]
|
||||
if !ok {
|
||||
klog.Infof("unknown resource %q: %q", resName, resID)
|
||||
klog.InfoS("unknown resource", "resourceName", resName, "resourceID", resID)
|
||||
continue
|
||||
}
|
||||
if _, ok := numaData[nodeID]; !ok {
|
||||
klog.Infof("unknown node id: %q", nodeID)
|
||||
klog.InfoS("unknown NUMA node id", "numaID", nodeID)
|
||||
continue
|
||||
}
|
||||
|
||||
|
@ -409,18 +409,18 @@ func getCPUs(devices []*podresourcesapi.ContainerDevices) map[string]int {
|
|||
// This function assumes the available resources are initialized to be equal to the capacity.
|
||||
func (noderesourceData *nodeResources) updateMemoryAvailable(numaData map[int]map[corev1.ResourceName]*resourceData, ri ResourceInfo) {
|
||||
if len(ri.NumaNodeIds) == 0 {
|
||||
klog.Warningf("no NUMA nodes information is available for device %q", ri.Name)
|
||||
klog.InfoS("no NUMA nodes information is available", "resourceName", ri.Name)
|
||||
return
|
||||
}
|
||||
|
||||
if len(ri.Data) != 1 {
|
||||
klog.Warningf("no size information is available for the device %q", ri.Name)
|
||||
klog.InfoS("no size information is available", "resourceName", ri.Name)
|
||||
return
|
||||
}
|
||||
|
||||
requestedSize, err := strconv.ParseInt(ri.Data[0], 10, 64)
|
||||
if err != nil {
|
||||
klog.Errorf("failed to parse resource requested size: %v", err)
|
||||
klog.ErrorS(err, "failed to parse resource requested size")
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -430,17 +430,17 @@ func (noderesourceData *nodeResources) updateMemoryAvailable(numaData map[int]ma
|
|||
}
|
||||
|
||||
if _, ok := numaData[numaNodeID]; !ok {
|
||||
klog.Warningf("failed to find NUMA node ID %d under the node topology", numaNodeID)
|
||||
klog.InfoS("failed to find NUMA node ID under the node topology", "numaID", numaNodeID)
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok := numaData[numaNodeID][ri.Name]; !ok {
|
||||
klog.Warningf("failed to find resource %q under the node topology", ri.Name)
|
||||
klog.InfoS("failed to find resource under the node topology", "resourceName", ri.Name)
|
||||
return
|
||||
}
|
||||
|
||||
if numaData[numaNodeID][ri.Name].available == 0 {
|
||||
klog.V(4).Infof("no available memory on the node %d for the resource %q", numaNodeID, ri.Name)
|
||||
klog.V(4).InfoS("no available memory", "numaID", numaNodeID, "resourceName", ri.Name)
|
||||
continue
|
||||
}
|
||||
|
||||
|
@ -461,7 +461,7 @@ func (noderesourceData *nodeResources) updateMemoryAvailable(numaData map[int]ma
|
|||
}
|
||||
|
||||
if requestedSize > 0 {
|
||||
klog.Warningf("the resource %q requested size was not fully satisfied by NUMA nodes", ri.Name)
|
||||
klog.InfoS("requested size was not fully satisfied by NUMA nodes", "resourceName", ri.Name)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -48,9 +48,9 @@ func NewPodResourcesScanner(namespace string, podResourceClient podresourcesapi.
|
|||
podFingerprint: podFingerprint,
|
||||
}
|
||||
if resourcemonitorInstance.namespace != "*" {
|
||||
klog.Infof("watching namespace %q", resourcemonitorInstance.namespace)
|
||||
klog.InfoS("watching one namespace", "namespace", resourcemonitorInstance.namespace)
|
||||
} else {
|
||||
klog.Infof("watching all namespaces")
|
||||
klog.InfoS("watching all namespaces")
|
||||
}
|
||||
|
||||
return resourcemonitorInstance, nil
|
||||
|
@ -67,7 +67,6 @@ func (resMon *PodResourcesScanner) isWatchable(podNamespace string, podName stri
|
|||
return false, false, err
|
||||
}
|
||||
|
||||
klog.Infof("podresource: %s", podName)
|
||||
isIntegralGuaranteed := hasExclusiveCPUs(pod)
|
||||
|
||||
if resMon.namespace == "*" && (isIntegralGuaranteed || hasDevice) {
|
||||
|
@ -137,9 +136,9 @@ func (resMon *PodResourcesScanner) Scan() (ScanResponse, error) {
|
|||
var status podfingerprint.Status
|
||||
podFingerprintSign, err := computePodFingerprint(respPodResources, &status)
|
||||
if err != nil {
|
||||
klog.Errorf("podFingerprint: Unable to compute fingerprint %v", err)
|
||||
klog.ErrorS(err, "failed to calculate fingerprint")
|
||||
} else {
|
||||
klog.Info("podFingerprint: " + status.Repr())
|
||||
klog.InfoS("podFingerprint calculated", "status", status.Repr())
|
||||
|
||||
retVal.Attributes = append(retVal.Attributes, v1alpha2.AttributeInfo{
|
||||
Name: podfingerprint.Attribute,
|
||||
|
@ -150,7 +149,7 @@ func (resMon *PodResourcesScanner) Scan() (ScanResponse, error) {
|
|||
var podResData []PodResources
|
||||
|
||||
for _, podResource := range respPodResources {
|
||||
klog.Infof("podresource iter: %s", podResource.GetName())
|
||||
klog.InfoS("scanning pod", "podName", podResource.GetName())
|
||||
hasDevice := hasDevice(podResource)
|
||||
isWatchable, isIntegralGuaranteed, err := resMon.isWatchable(podResource.GetNamespace(), podResource.GetName(), hasDevice)
|
||||
if err != nil {
|
||||
|
@ -233,7 +232,7 @@ func hasDevice(podResource *podresourcesapi.PodResources) bool {
|
|||
return true
|
||||
}
|
||||
}
|
||||
klog.Infof("pod:%s doesn't have devices", podResource.GetName())
|
||||
klog.InfoS("pod doesn't have devices", "podName", podResource.GetName())
|
||||
return false
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue