From 6e3b181ab440bc76214e47c8f9f7e20b8464d655 Mon Sep 17 00:00:00 2001 From: Markus Lehtonen Date: Wed, 3 May 2023 11:32:53 +0300 Subject: [PATCH] topology-updater: migrate to structured logging --- cmd/nfd-topology-gc/main.go | 8 ++- cmd/nfd-topology-updater/main.go | 66 +++++++++++-------- pkg/nfd-topology-gc/nfd-nrt-gc.go | 20 +++--- .../kubeletnotifier/kubeletnotifier.go | 4 +- .../nfd-topology-updater.go | 17 ++--- pkg/resourcemonitor/excludelist.go | 2 +- .../noderesourcesaggregator.go | 30 ++++----- pkg/resourcemonitor/podresourcesscanner.go | 13 ++-- 8 files changed, 85 insertions(+), 75 deletions(-) diff --git a/cmd/nfd-topology-gc/main.go b/cmd/nfd-topology-gc/main.go index ac66bb3b0..99626a489 100644 --- a/cmd/nfd-topology-gc/main.go +++ b/cmd/nfd-topology-gc/main.go @@ -47,17 +47,19 @@ func main() { // Assert that the version is known if version.Undefined() { - klog.Warningf("version not set! Set -ldflags \"-X sigs.k8s.io/node-feature-discovery/pkg/version.version=`git describe --tags --dirty --always`\" during build or run.") + klog.InfoS("version not set! Set -ldflags \"-X sigs.k8s.io/node-feature-discovery/pkg/version.version=`git describe --tags --dirty --always`\" during build or run.") } // Get new TopologyGC instance gc, err := nfdtopologygarbagecollector.New(args) if err != nil { - klog.Exit(err) + klog.ErrorS(err, "failed to initialize topology garbage collector instance") + os.Exit(1) } if err = gc.Run(); err != nil { - klog.Exit(err) + klog.ErrorS(err, "error while running") + os.Exit(1) } } diff --git a/cmd/nfd-topology-updater/main.go b/cmd/nfd-topology-updater/main.go index d9627e3de..b85732520 100644 --- a/cmd/nfd-topology-updater/main.go +++ b/cmd/nfd-topology-updater/main.go @@ -57,47 +57,28 @@ func main() { // Assert that the version is known if version.Undefined() { - klog.Warningf("version not set! Set -ldflags \"-X sigs.k8s.io/node-feature-discovery/pkg/version.version=`git describe --tags --dirty --always`\" during build or run.") + klog.InfoS("version not set! Set -ldflags \"-X sigs.k8s.io/node-feature-discovery/pkg/version.version=`git describe --tags --dirty --always`\" during build or run.") } // Plug klog into grpc logging infrastructure utils.ConfigureGrpcKlog() - u, err := url.ParseRequestURI(resourcemonitorArgs.KubeletConfigURI) + klConfig, err := getKubeletConfig(resourcemonitorArgs.KubeletConfigURI, resourcemonitorArgs.APIAuthTokenFile) if err != nil { - klog.Exitf("failed to parse args for kubelet-config-uri: %v", err) - } - - // init kubelet API client - var klConfig *kubeletconfigv1beta1.KubeletConfiguration - switch u.Scheme { - case "file": - klConfig, err = kubeconf.GetKubeletConfigFromLocalFile(u.Path) - if err != nil { - klog.Exitf("failed to read kubelet config: %v", err) - } - case "https": - restConfig, err := kubeconf.InsecureConfig(u.String(), resourcemonitorArgs.APIAuthTokenFile) - if err != nil { - klog.Exitf("failed to initialize rest config for kubelet config uri: %v", err) - } - - klConfig, err = kubeconf.GetKubeletConfiguration(restConfig) - if err != nil { - klog.Exitf("failed to get kubelet config from configz endpoint: %v", err) - } - default: - klog.Exitf("unsupported URI scheme: %v", u.Scheme) + klog.ErrorS(err, "failed to get kubelet configuration") + os.Exit(1) } // Get new TopologyUpdater instance instance, err := topology.NewTopologyUpdater(*args, *resourcemonitorArgs, klConfig.TopologyManagerPolicy, klConfig.TopologyManagerScope) if err != nil { - klog.Exit(err) + klog.ErrorS(err, "failed to initialize topology updater instance") + os.Exit(1) } if err = instance.Run(); err != nil { - klog.Exit(err) + klog.ErrorS(err, "error while running") + os.Exit(1) } } @@ -153,3 +134,34 @@ func initFlags(flagset *flag.FlagSet) (*topology.Args, *resourcemonitor.Args) { return args, resourcemonitorArgs } + +func getKubeletConfig(uri, apiAuthTokenFile string) (*kubeletconfigv1beta1.KubeletConfiguration, error) { + u, err := url.ParseRequestURI(uri) + if err != nil { + return nil, fmt.Errorf("failed to parse -kubelet-config-uri: %w", err) + } + + // init kubelet API client + var klConfig *kubeletconfigv1beta1.KubeletConfiguration + switch u.Scheme { + case "file": + klConfig, err = kubeconf.GetKubeletConfigFromLocalFile(u.Path) + if err != nil { + return nil, fmt.Errorf("failed to read kubelet config: %w", err) + } + return klConfig, err + case "https": + restConfig, err := kubeconf.InsecureConfig(u.String(), apiAuthTokenFile) + if err != nil { + return nil, fmt.Errorf("failed to initialize rest config for kubelet config uri: %w", err) + } + + klConfig, err = kubeconf.GetKubeletConfiguration(restConfig) + if err != nil { + return nil, fmt.Errorf("failed to get kubelet config from configz endpoint: %w", err) + } + return klConfig, nil + } + + return nil, fmt.Errorf("unsupported URI scheme: %v", u.Scheme) +} diff --git a/pkg/nfd-topology-gc/nfd-nrt-gc.go b/pkg/nfd-topology-gc/nfd-nrt-gc.go index 744d141d0..86fbc313a 100644 --- a/pkg/nfd-topology-gc/nfd-nrt-gc.go +++ b/pkg/nfd-topology-gc/nfd-nrt-gc.go @@ -85,27 +85,27 @@ func newTopologyGC(config *restclient.Config, stop chan struct{}, gcPeriod time. func (n *topologyGC) deleteNRT(nodeName string) { if err := n.topoClient.TopologyV1alpha2().NodeResourceTopologies().Delete(context.TODO(), nodeName, metav1.DeleteOptions{}); err != nil { if errors.IsNotFound(err) { - klog.V(2).Infof("NodeResourceTopology for node %s not found, omitting deletion", nodeName) + klog.V(2).InfoS("NodeResourceTopology not found, omitting deletion", "nodeName", nodeName) return } else { - klog.Warningf("failed to delete NodeResourceTopology for node %s: %s", nodeName, err.Error()) + klog.ErrorS(err, "failed to delete NodeResourceTopology object", "nodeName", nodeName) return } } - klog.Infof("NodeResourceTopology for node %s has been deleted", nodeName) + klog.InfoS("NodeResourceTopology object has been deleted", "nodeName", nodeName) } func (n *topologyGC) deleteNodeHandler(object interface{}) { // handle a case when we are starting up and need to clear stale NRT resources obj := object if deletedFinalStateUnknown, ok := object.(cache.DeletedFinalStateUnknown); ok { - klog.V(2).Infof("found stale NodeResourceTopology for node: %s ", object) + klog.V(2).InfoS("found stale NodeResourceTopology object", "object", object) obj = deletedFinalStateUnknown.Obj } node, ok := obj.(*corev1.Node) if !ok { - klog.Errorf("cannot convert %v to v1.Node", object) + klog.InfoS("cannot convert object to v1.Node", "object", object) return } @@ -113,13 +113,13 @@ func (n *topologyGC) deleteNodeHandler(object interface{}) { } func (n *topologyGC) runGC() { - klog.Infof("Running GC") + klog.InfoS("Running GC") objects := n.factory.Core().V1().Nodes().Informer().GetIndexer().List() nodes := sets.NewString() for _, object := range objects { key, err := cache.MetaNamespaceKeyFunc(object) if err != nil { - klog.Warningf("cannot create key for %v: %s", object, err.Error()) + klog.ErrorS(err, "failed to create key", "object", object) continue } nodes.Insert(key) @@ -127,14 +127,14 @@ func (n *topologyGC) runGC() { nrts, err := n.topoClient.TopologyV1alpha2().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{}) if err != nil { - klog.Warningf("cannot list NRTs %s", err.Error()) + klog.ErrorS(err, "failed to list NodeResourceTopology objects") return } for _, nrt := range nrts.Items { key, err := cache.MetaNamespaceKeyFunc(&nrt) if err != nil { - klog.Warningf("cannot create key for %v: %s", nrt, err.Error()) + klog.ErrorS(err, "failed to create key", "noderesourcetopology", klog.KObj(&nrt)) continue } if !nodes.Has(key) { @@ -151,7 +151,7 @@ func (n *topologyGC) periodicGC(gcPeriod time.Duration) { case <-gcTrigger.C: n.runGC() case <-n.stopChan: - klog.Infof("shutting down periodic Garbage Collector") + klog.InfoS("shutting down periodic Garbage Collector") return } } diff --git a/pkg/nfd-topology-updater/kubeletnotifier/kubeletnotifier.go b/pkg/nfd-topology-updater/kubeletnotifier/kubeletnotifier.go index 3d653b98b..a1b0bffad 100644 --- a/pkg/nfd-topology-updater/kubeletnotifier/kubeletnotifier.go +++ b/pkg/nfd-topology-updater/kubeletnotifier/kubeletnotifier.go @@ -76,13 +76,13 @@ func (n *Notifier) Run() { for { select { case <-timeEvents: - klog.V(5).Infof("timer update received") + klog.V(5).InfoS("timer update received") i := Info{Event: IntervalBased} n.dest <- i case e := <-n.fsEvent: basename := path.Base(e.Name) - klog.V(5).Infof("fsnotify event from file %q: %q received", basename, e.Op) + klog.V(5).InfoS("fsnotify event received", "filename", basename, "op", e.Op) if stateFiles.Has(basename) { i := Info{Event: FSUpdate} n.dest <- i diff --git a/pkg/nfd-topology-updater/nfd-topology-updater.go b/pkg/nfd-topology-updater/nfd-topology-updater.go index 5e9b10825..79af32367 100644 --- a/pkg/nfd-topology-updater/nfd-topology-updater.go +++ b/pkg/nfd-topology-updater/nfd-topology-updater.go @@ -120,8 +120,7 @@ func NewTopologyUpdater(args Args, resourcemonitorArgs resourcemonitor.Args, pol // Run nfdTopologyUpdater. Returns if a fatal error is encountered, or, after // one request if OneShot is set to 'true' in the updater args. func (w *nfdTopologyUpdater) Run() error { - klog.Infof("Node Feature Discovery Topology Updater %s", version.Get()) - klog.Infof("NodeName: '%s'", w.nodeInfo.nodeName) + klog.InfoS("Node Feature Discovery Topology Updater", "version", version.Get(), "nodeName", w.nodeInfo.nodeName) podResClient, err := podres.GetPodResClient(w.resourcemonitorArgs.PodResourceSocketPath) if err != nil { @@ -158,16 +157,14 @@ func (w *nfdTopologyUpdater) Run() error { return fmt.Errorf("failed to obtain node resource information: %w", err) } - klog.V(2).Infof("resAggr is: %v\n", resAggr) - for { select { case info := <-w.eventSource: - klog.V(4).Infof("got %q event. scanning...", info.Event) + klog.V(4).InfoS("event received, scanning...", "event", info.Event) scanResponse, err := resScan.Scan() utils.KlogDump(1, "podResources are", " ", scanResponse.PodResources) if err != nil { - klog.Warningf("scan failed: %v", err) + klog.ErrorS(err, "scan failed") continue } zones = resAggr.Aggregate(scanResponse.PodResources) @@ -183,7 +180,7 @@ func (w *nfdTopologyUpdater) Run() error { } case <-w.stop: - klog.Infof("shutting down nfd-topology-updater") + klog.InfoS("shutting down nfd-topology-updater") return nil } } @@ -240,7 +237,7 @@ func (w *nfdTopologyUpdater) updateNodeResourceTopology(zoneInfo v1alpha2.ZoneLi func (w *nfdTopologyUpdater) configure() error { if w.configFilePath == "" { - klog.Warningf("file path for nfd-topology-updater conf file is empty") + klog.InfoS("no configuration file specified") return nil } @@ -248,7 +245,7 @@ func (w *nfdTopologyUpdater) configure() error { if err != nil { // config is optional if os.IsNotExist(err) { - klog.Warningf("couldn't find conf file under %v", w.configFilePath) + klog.InfoS("configuration file not found", "path", w.configFilePath) return nil } return err @@ -258,7 +255,7 @@ func (w *nfdTopologyUpdater) configure() error { if err != nil { return fmt.Errorf("failed to parse configuration file %q: %w", w.configFilePath, err) } - klog.Infof("configuration file %q parsed:\n %v", w.configFilePath, w.config) + klog.InfoS("configuration file parsed", "path", w.configFilePath, "config", w.config) return nil } diff --git a/pkg/resourcemonitor/excludelist.go b/pkg/resourcemonitor/excludelist.go index 590d0bb8a..8411594a4 100644 --- a/pkg/resourcemonitor/excludelist.go +++ b/pkg/resourcemonitor/excludelist.go @@ -27,7 +27,7 @@ func NewExcludeResourceList(resMap map[string][]string, nodeName string) Exclude func (rl *ExcludeResourceList) IsExcluded(resource corev1.ResourceName) bool { if rl.excludeList.Has(string(resource)) { - klog.V(5).InfoS("resource excluded", "resource", resource) + klog.V(5).InfoS("resource excluded", "resourceName", resource) return true } return false diff --git a/pkg/resourcemonitor/noderesourcesaggregator.go b/pkg/resourcemonitor/noderesourcesaggregator.go index be3865a03..0b92ddf75 100644 --- a/pkg/resourcemonitor/noderesourcesaggregator.go +++ b/pkg/resourcemonitor/noderesourcesaggregator.go @@ -77,9 +77,9 @@ func NewResourcesAggregator(podResourceClient podresourcesapi.PodResourcesLister resp, err := podResourceClient.GetAllocatableResources(ctx, &podresourcesapi.AllocatableResourcesRequest{}) if err != nil { if strings.Contains(err.Error(), "API GetAllocatableResources disabled") { - klog.Error("Kubelet's pod resources 'GetAllocatableResources' functionality is disabled. " + - "Ensure feature flag 'KubeletPodResourcesGetAllocatable' is set to true. " + - "You can find more about the feature gates from the following URL - " + + klog.ErrorS(err, "Kubelet's pod resources 'GetAllocatableResources' functionality is disabled. "+ + "Ensure feature flag 'KubeletPodResourcesGetAllocatable' is set to true. "+ + "You can find more about the feature gates from the following URL - "+ "https://kubernetes.io/docs/reference/command-line-tools-reference/feature-gates/") } @@ -178,7 +178,7 @@ func (noderesourceData *nodeResources) Aggregate(podResData []PodResources) topo costs, err := makeCostsPerNumaNode(noderesourceData.topo.Nodes, nodeID) if err != nil { - klog.Infof("cannot find costs for NUMA node %d: %v", nodeID, err) + klog.ErrorS(err, "failed to calculate costs for NUMA node", "nodeID", nodeID) } else { zone.Costs = costs } @@ -212,7 +212,7 @@ func getContainerDevicesFromAllocatableResources(availRes *podresourcesapi.Alloc for _, cpuID := range availRes.GetCpuIds() { nodeID, ok := cpuIDToNodeIDMap[int(cpuID)] if !ok { - klog.Infof("cannot find the NUMA node for CPU %d", cpuID) + klog.InfoS("failed to find the NUMA node for CPU", "cpuID", cpuID) continue } @@ -243,16 +243,16 @@ func (noderesourceData *nodeResources) updateAvailable(numaData map[int]map[core resName := string(ri.Name) resMap, ok := noderesourceData.resourceID2NUMAID[resName] if !ok { - klog.Infof("unknown resource %q", ri.Name) + klog.InfoS("unknown resource", "resourceName", ri.Name) continue } nodeID, ok := resMap[resID] if !ok { - klog.Infof("unknown resource %q: %q", resName, resID) + klog.InfoS("unknown resource", "resourceName", resName, "resourceID", resID) continue } if _, ok := numaData[nodeID]; !ok { - klog.Infof("unknown node id: %q", nodeID) + klog.InfoS("unknown NUMA node id", "numaID", nodeID) continue } @@ -409,18 +409,18 @@ func getCPUs(devices []*podresourcesapi.ContainerDevices) map[string]int { // This function assumes the available resources are initialized to be equal to the capacity. func (noderesourceData *nodeResources) updateMemoryAvailable(numaData map[int]map[corev1.ResourceName]*resourceData, ri ResourceInfo) { if len(ri.NumaNodeIds) == 0 { - klog.Warningf("no NUMA nodes information is available for device %q", ri.Name) + klog.InfoS("no NUMA nodes information is available", "resourceName", ri.Name) return } if len(ri.Data) != 1 { - klog.Warningf("no size information is available for the device %q", ri.Name) + klog.InfoS("no size information is available", "resourceName", ri.Name) return } requestedSize, err := strconv.ParseInt(ri.Data[0], 10, 64) if err != nil { - klog.Errorf("failed to parse resource requested size: %v", err) + klog.ErrorS(err, "failed to parse resource requested size") return } @@ -430,17 +430,17 @@ func (noderesourceData *nodeResources) updateMemoryAvailable(numaData map[int]ma } if _, ok := numaData[numaNodeID]; !ok { - klog.Warningf("failed to find NUMA node ID %d under the node topology", numaNodeID) + klog.InfoS("failed to find NUMA node ID under the node topology", "numaID", numaNodeID) continue } if _, ok := numaData[numaNodeID][ri.Name]; !ok { - klog.Warningf("failed to find resource %q under the node topology", ri.Name) + klog.InfoS("failed to find resource under the node topology", "resourceName", ri.Name) return } if numaData[numaNodeID][ri.Name].available == 0 { - klog.V(4).Infof("no available memory on the node %d for the resource %q", numaNodeID, ri.Name) + klog.V(4).InfoS("no available memory", "numaID", numaNodeID, "resourceName", ri.Name) continue } @@ -461,7 +461,7 @@ func (noderesourceData *nodeResources) updateMemoryAvailable(numaData map[int]ma } if requestedSize > 0 { - klog.Warningf("the resource %q requested size was not fully satisfied by NUMA nodes", ri.Name) + klog.InfoS("requested size was not fully satisfied by NUMA nodes", "resourceName", ri.Name) } } diff --git a/pkg/resourcemonitor/podresourcesscanner.go b/pkg/resourcemonitor/podresourcesscanner.go index 00775d9ba..5ff086463 100644 --- a/pkg/resourcemonitor/podresourcesscanner.go +++ b/pkg/resourcemonitor/podresourcesscanner.go @@ -48,9 +48,9 @@ func NewPodResourcesScanner(namespace string, podResourceClient podresourcesapi. podFingerprint: podFingerprint, } if resourcemonitorInstance.namespace != "*" { - klog.Infof("watching namespace %q", resourcemonitorInstance.namespace) + klog.InfoS("watching one namespace", "namespace", resourcemonitorInstance.namespace) } else { - klog.Infof("watching all namespaces") + klog.InfoS("watching all namespaces") } return resourcemonitorInstance, nil @@ -67,7 +67,6 @@ func (resMon *PodResourcesScanner) isWatchable(podNamespace string, podName stri return false, false, err } - klog.Infof("podresource: %s", podName) isIntegralGuaranteed := hasExclusiveCPUs(pod) if resMon.namespace == "*" && (isIntegralGuaranteed || hasDevice) { @@ -137,9 +136,9 @@ func (resMon *PodResourcesScanner) Scan() (ScanResponse, error) { var status podfingerprint.Status podFingerprintSign, err := computePodFingerprint(respPodResources, &status) if err != nil { - klog.Errorf("podFingerprint: Unable to compute fingerprint %v", err) + klog.ErrorS(err, "failed to calculate fingerprint") } else { - klog.Info("podFingerprint: " + status.Repr()) + klog.InfoS("podFingerprint calculated", "status", status.Repr()) retVal.Attributes = append(retVal.Attributes, v1alpha2.AttributeInfo{ Name: podfingerprint.Attribute, @@ -150,7 +149,7 @@ func (resMon *PodResourcesScanner) Scan() (ScanResponse, error) { var podResData []PodResources for _, podResource := range respPodResources { - klog.Infof("podresource iter: %s", podResource.GetName()) + klog.InfoS("scanning pod", "podName", podResource.GetName()) hasDevice := hasDevice(podResource) isWatchable, isIntegralGuaranteed, err := resMon.isWatchable(podResource.GetNamespace(), podResource.GetName(), hasDevice) if err != nil { @@ -233,7 +232,7 @@ func hasDevice(podResource *podresourcesapi.PodResources) bool { return true } } - klog.Infof("pod:%s doesn't have devices", podResource.GetName()) + klog.InfoS("pod doesn't have devices", "podName", podResource.GetName()) return false }