1
0
Fork 0
mirror of https://github.com/kubernetes-sigs/node-feature-discovery.git synced 2024-12-14 11:57:51 +00:00

topology-updater: migrate to structured logging

This commit is contained in:
Markus Lehtonen 2023-05-03 11:32:53 +03:00
parent 7be08f9e7f
commit 6e3b181ab4
8 changed files with 85 additions and 75 deletions

View file

@ -47,17 +47,19 @@ func main() {
// Assert that the version is known
if version.Undefined() {
klog.Warningf("version not set! Set -ldflags \"-X sigs.k8s.io/node-feature-discovery/pkg/version.version=`git describe --tags --dirty --always`\" during build or run.")
klog.InfoS("version not set! Set -ldflags \"-X sigs.k8s.io/node-feature-discovery/pkg/version.version=`git describe --tags --dirty --always`\" during build or run.")
}
// Get new TopologyGC instance
gc, err := nfdtopologygarbagecollector.New(args)
if err != nil {
klog.Exit(err)
klog.ErrorS(err, "failed to initialize topology garbage collector instance")
os.Exit(1)
}
if err = gc.Run(); err != nil {
klog.Exit(err)
klog.ErrorS(err, "error while running")
os.Exit(1)
}
}

View file

@ -57,47 +57,28 @@ func main() {
// Assert that the version is known
if version.Undefined() {
klog.Warningf("version not set! Set -ldflags \"-X sigs.k8s.io/node-feature-discovery/pkg/version.version=`git describe --tags --dirty --always`\" during build or run.")
klog.InfoS("version not set! Set -ldflags \"-X sigs.k8s.io/node-feature-discovery/pkg/version.version=`git describe --tags --dirty --always`\" during build or run.")
}
// Plug klog into grpc logging infrastructure
utils.ConfigureGrpcKlog()
u, err := url.ParseRequestURI(resourcemonitorArgs.KubeletConfigURI)
klConfig, err := getKubeletConfig(resourcemonitorArgs.KubeletConfigURI, resourcemonitorArgs.APIAuthTokenFile)
if err != nil {
klog.Exitf("failed to parse args for kubelet-config-uri: %v", err)
}
// init kubelet API client
var klConfig *kubeletconfigv1beta1.KubeletConfiguration
switch u.Scheme {
case "file":
klConfig, err = kubeconf.GetKubeletConfigFromLocalFile(u.Path)
if err != nil {
klog.Exitf("failed to read kubelet config: %v", err)
}
case "https":
restConfig, err := kubeconf.InsecureConfig(u.String(), resourcemonitorArgs.APIAuthTokenFile)
if err != nil {
klog.Exitf("failed to initialize rest config for kubelet config uri: %v", err)
}
klConfig, err = kubeconf.GetKubeletConfiguration(restConfig)
if err != nil {
klog.Exitf("failed to get kubelet config from configz endpoint: %v", err)
}
default:
klog.Exitf("unsupported URI scheme: %v", u.Scheme)
klog.ErrorS(err, "failed to get kubelet configuration")
os.Exit(1)
}
// Get new TopologyUpdater instance
instance, err := topology.NewTopologyUpdater(*args, *resourcemonitorArgs, klConfig.TopologyManagerPolicy, klConfig.TopologyManagerScope)
if err != nil {
klog.Exit(err)
klog.ErrorS(err, "failed to initialize topology updater instance")
os.Exit(1)
}
if err = instance.Run(); err != nil {
klog.Exit(err)
klog.ErrorS(err, "error while running")
os.Exit(1)
}
}
@ -153,3 +134,34 @@ func initFlags(flagset *flag.FlagSet) (*topology.Args, *resourcemonitor.Args) {
return args, resourcemonitorArgs
}
func getKubeletConfig(uri, apiAuthTokenFile string) (*kubeletconfigv1beta1.KubeletConfiguration, error) {
u, err := url.ParseRequestURI(uri)
if err != nil {
return nil, fmt.Errorf("failed to parse -kubelet-config-uri: %w", err)
}
// init kubelet API client
var klConfig *kubeletconfigv1beta1.KubeletConfiguration
switch u.Scheme {
case "file":
klConfig, err = kubeconf.GetKubeletConfigFromLocalFile(u.Path)
if err != nil {
return nil, fmt.Errorf("failed to read kubelet config: %w", err)
}
return klConfig, err
case "https":
restConfig, err := kubeconf.InsecureConfig(u.String(), apiAuthTokenFile)
if err != nil {
return nil, fmt.Errorf("failed to initialize rest config for kubelet config uri: %w", err)
}
klConfig, err = kubeconf.GetKubeletConfiguration(restConfig)
if err != nil {
return nil, fmt.Errorf("failed to get kubelet config from configz endpoint: %w", err)
}
return klConfig, nil
}
return nil, fmt.Errorf("unsupported URI scheme: %v", u.Scheme)
}

View file

@ -85,27 +85,27 @@ func newTopologyGC(config *restclient.Config, stop chan struct{}, gcPeriod time.
func (n *topologyGC) deleteNRT(nodeName string) {
if err := n.topoClient.TopologyV1alpha2().NodeResourceTopologies().Delete(context.TODO(), nodeName, metav1.DeleteOptions{}); err != nil {
if errors.IsNotFound(err) {
klog.V(2).Infof("NodeResourceTopology for node %s not found, omitting deletion", nodeName)
klog.V(2).InfoS("NodeResourceTopology not found, omitting deletion", "nodeName", nodeName)
return
} else {
klog.Warningf("failed to delete NodeResourceTopology for node %s: %s", nodeName, err.Error())
klog.ErrorS(err, "failed to delete NodeResourceTopology object", "nodeName", nodeName)
return
}
}
klog.Infof("NodeResourceTopology for node %s has been deleted", nodeName)
klog.InfoS("NodeResourceTopology object has been deleted", "nodeName", nodeName)
}
func (n *topologyGC) deleteNodeHandler(object interface{}) {
// handle a case when we are starting up and need to clear stale NRT resources
obj := object
if deletedFinalStateUnknown, ok := object.(cache.DeletedFinalStateUnknown); ok {
klog.V(2).Infof("found stale NodeResourceTopology for node: %s ", object)
klog.V(2).InfoS("found stale NodeResourceTopology object", "object", object)
obj = deletedFinalStateUnknown.Obj
}
node, ok := obj.(*corev1.Node)
if !ok {
klog.Errorf("cannot convert %v to v1.Node", object)
klog.InfoS("cannot convert object to v1.Node", "object", object)
return
}
@ -113,13 +113,13 @@ func (n *topologyGC) deleteNodeHandler(object interface{}) {
}
func (n *topologyGC) runGC() {
klog.Infof("Running GC")
klog.InfoS("Running GC")
objects := n.factory.Core().V1().Nodes().Informer().GetIndexer().List()
nodes := sets.NewString()
for _, object := range objects {
key, err := cache.MetaNamespaceKeyFunc(object)
if err != nil {
klog.Warningf("cannot create key for %v: %s", object, err.Error())
klog.ErrorS(err, "failed to create key", "object", object)
continue
}
nodes.Insert(key)
@ -127,14 +127,14 @@ func (n *topologyGC) runGC() {
nrts, err := n.topoClient.TopologyV1alpha2().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
if err != nil {
klog.Warningf("cannot list NRTs %s", err.Error())
klog.ErrorS(err, "failed to list NodeResourceTopology objects")
return
}
for _, nrt := range nrts.Items {
key, err := cache.MetaNamespaceKeyFunc(&nrt)
if err != nil {
klog.Warningf("cannot create key for %v: %s", nrt, err.Error())
klog.ErrorS(err, "failed to create key", "noderesourcetopology", klog.KObj(&nrt))
continue
}
if !nodes.Has(key) {
@ -151,7 +151,7 @@ func (n *topologyGC) periodicGC(gcPeriod time.Duration) {
case <-gcTrigger.C:
n.runGC()
case <-n.stopChan:
klog.Infof("shutting down periodic Garbage Collector")
klog.InfoS("shutting down periodic Garbage Collector")
return
}
}

View file

@ -76,13 +76,13 @@ func (n *Notifier) Run() {
for {
select {
case <-timeEvents:
klog.V(5).Infof("timer update received")
klog.V(5).InfoS("timer update received")
i := Info{Event: IntervalBased}
n.dest <- i
case e := <-n.fsEvent:
basename := path.Base(e.Name)
klog.V(5).Infof("fsnotify event from file %q: %q received", basename, e.Op)
klog.V(5).InfoS("fsnotify event received", "filename", basename, "op", e.Op)
if stateFiles.Has(basename) {
i := Info{Event: FSUpdate}
n.dest <- i

View file

@ -120,8 +120,7 @@ func NewTopologyUpdater(args Args, resourcemonitorArgs resourcemonitor.Args, pol
// Run nfdTopologyUpdater. Returns if a fatal error is encountered, or, after
// one request if OneShot is set to 'true' in the updater args.
func (w *nfdTopologyUpdater) Run() error {
klog.Infof("Node Feature Discovery Topology Updater %s", version.Get())
klog.Infof("NodeName: '%s'", w.nodeInfo.nodeName)
klog.InfoS("Node Feature Discovery Topology Updater", "version", version.Get(), "nodeName", w.nodeInfo.nodeName)
podResClient, err := podres.GetPodResClient(w.resourcemonitorArgs.PodResourceSocketPath)
if err != nil {
@ -158,16 +157,14 @@ func (w *nfdTopologyUpdater) Run() error {
return fmt.Errorf("failed to obtain node resource information: %w", err)
}
klog.V(2).Infof("resAggr is: %v\n", resAggr)
for {
select {
case info := <-w.eventSource:
klog.V(4).Infof("got %q event. scanning...", info.Event)
klog.V(4).InfoS("event received, scanning...", "event", info.Event)
scanResponse, err := resScan.Scan()
utils.KlogDump(1, "podResources are", " ", scanResponse.PodResources)
if err != nil {
klog.Warningf("scan failed: %v", err)
klog.ErrorS(err, "scan failed")
continue
}
zones = resAggr.Aggregate(scanResponse.PodResources)
@ -183,7 +180,7 @@ func (w *nfdTopologyUpdater) Run() error {
}
case <-w.stop:
klog.Infof("shutting down nfd-topology-updater")
klog.InfoS("shutting down nfd-topology-updater")
return nil
}
}
@ -240,7 +237,7 @@ func (w *nfdTopologyUpdater) updateNodeResourceTopology(zoneInfo v1alpha2.ZoneLi
func (w *nfdTopologyUpdater) configure() error {
if w.configFilePath == "" {
klog.Warningf("file path for nfd-topology-updater conf file is empty")
klog.InfoS("no configuration file specified")
return nil
}
@ -248,7 +245,7 @@ func (w *nfdTopologyUpdater) configure() error {
if err != nil {
// config is optional
if os.IsNotExist(err) {
klog.Warningf("couldn't find conf file under %v", w.configFilePath)
klog.InfoS("configuration file not found", "path", w.configFilePath)
return nil
}
return err
@ -258,7 +255,7 @@ func (w *nfdTopologyUpdater) configure() error {
if err != nil {
return fmt.Errorf("failed to parse configuration file %q: %w", w.configFilePath, err)
}
klog.Infof("configuration file %q parsed:\n %v", w.configFilePath, w.config)
klog.InfoS("configuration file parsed", "path", w.configFilePath, "config", w.config)
return nil
}

View file

@ -27,7 +27,7 @@ func NewExcludeResourceList(resMap map[string][]string, nodeName string) Exclude
func (rl *ExcludeResourceList) IsExcluded(resource corev1.ResourceName) bool {
if rl.excludeList.Has(string(resource)) {
klog.V(5).InfoS("resource excluded", "resource", resource)
klog.V(5).InfoS("resource excluded", "resourceName", resource)
return true
}
return false

View file

@ -77,9 +77,9 @@ func NewResourcesAggregator(podResourceClient podresourcesapi.PodResourcesLister
resp, err := podResourceClient.GetAllocatableResources(ctx, &podresourcesapi.AllocatableResourcesRequest{})
if err != nil {
if strings.Contains(err.Error(), "API GetAllocatableResources disabled") {
klog.Error("Kubelet's pod resources 'GetAllocatableResources' functionality is disabled. " +
"Ensure feature flag 'KubeletPodResourcesGetAllocatable' is set to true. " +
"You can find more about the feature gates from the following URL - " +
klog.ErrorS(err, "Kubelet's pod resources 'GetAllocatableResources' functionality is disabled. "+
"Ensure feature flag 'KubeletPodResourcesGetAllocatable' is set to true. "+
"You can find more about the feature gates from the following URL - "+
"https://kubernetes.io/docs/reference/command-line-tools-reference/feature-gates/")
}
@ -178,7 +178,7 @@ func (noderesourceData *nodeResources) Aggregate(podResData []PodResources) topo
costs, err := makeCostsPerNumaNode(noderesourceData.topo.Nodes, nodeID)
if err != nil {
klog.Infof("cannot find costs for NUMA node %d: %v", nodeID, err)
klog.ErrorS(err, "failed to calculate costs for NUMA node", "nodeID", nodeID)
} else {
zone.Costs = costs
}
@ -212,7 +212,7 @@ func getContainerDevicesFromAllocatableResources(availRes *podresourcesapi.Alloc
for _, cpuID := range availRes.GetCpuIds() {
nodeID, ok := cpuIDToNodeIDMap[int(cpuID)]
if !ok {
klog.Infof("cannot find the NUMA node for CPU %d", cpuID)
klog.InfoS("failed to find the NUMA node for CPU", "cpuID", cpuID)
continue
}
@ -243,16 +243,16 @@ func (noderesourceData *nodeResources) updateAvailable(numaData map[int]map[core
resName := string(ri.Name)
resMap, ok := noderesourceData.resourceID2NUMAID[resName]
if !ok {
klog.Infof("unknown resource %q", ri.Name)
klog.InfoS("unknown resource", "resourceName", ri.Name)
continue
}
nodeID, ok := resMap[resID]
if !ok {
klog.Infof("unknown resource %q: %q", resName, resID)
klog.InfoS("unknown resource", "resourceName", resName, "resourceID", resID)
continue
}
if _, ok := numaData[nodeID]; !ok {
klog.Infof("unknown node id: %q", nodeID)
klog.InfoS("unknown NUMA node id", "numaID", nodeID)
continue
}
@ -409,18 +409,18 @@ func getCPUs(devices []*podresourcesapi.ContainerDevices) map[string]int {
// This function assumes the available resources are initialized to be equal to the capacity.
func (noderesourceData *nodeResources) updateMemoryAvailable(numaData map[int]map[corev1.ResourceName]*resourceData, ri ResourceInfo) {
if len(ri.NumaNodeIds) == 0 {
klog.Warningf("no NUMA nodes information is available for device %q", ri.Name)
klog.InfoS("no NUMA nodes information is available", "resourceName", ri.Name)
return
}
if len(ri.Data) != 1 {
klog.Warningf("no size information is available for the device %q", ri.Name)
klog.InfoS("no size information is available", "resourceName", ri.Name)
return
}
requestedSize, err := strconv.ParseInt(ri.Data[0], 10, 64)
if err != nil {
klog.Errorf("failed to parse resource requested size: %v", err)
klog.ErrorS(err, "failed to parse resource requested size")
return
}
@ -430,17 +430,17 @@ func (noderesourceData *nodeResources) updateMemoryAvailable(numaData map[int]ma
}
if _, ok := numaData[numaNodeID]; !ok {
klog.Warningf("failed to find NUMA node ID %d under the node topology", numaNodeID)
klog.InfoS("failed to find NUMA node ID under the node topology", "numaID", numaNodeID)
continue
}
if _, ok := numaData[numaNodeID][ri.Name]; !ok {
klog.Warningf("failed to find resource %q under the node topology", ri.Name)
klog.InfoS("failed to find resource under the node topology", "resourceName", ri.Name)
return
}
if numaData[numaNodeID][ri.Name].available == 0 {
klog.V(4).Infof("no available memory on the node %d for the resource %q", numaNodeID, ri.Name)
klog.V(4).InfoS("no available memory", "numaID", numaNodeID, "resourceName", ri.Name)
continue
}
@ -461,7 +461,7 @@ func (noderesourceData *nodeResources) updateMemoryAvailable(numaData map[int]ma
}
if requestedSize > 0 {
klog.Warningf("the resource %q requested size was not fully satisfied by NUMA nodes", ri.Name)
klog.InfoS("requested size was not fully satisfied by NUMA nodes", "resourceName", ri.Name)
}
}

View file

@ -48,9 +48,9 @@ func NewPodResourcesScanner(namespace string, podResourceClient podresourcesapi.
podFingerprint: podFingerprint,
}
if resourcemonitorInstance.namespace != "*" {
klog.Infof("watching namespace %q", resourcemonitorInstance.namespace)
klog.InfoS("watching one namespace", "namespace", resourcemonitorInstance.namespace)
} else {
klog.Infof("watching all namespaces")
klog.InfoS("watching all namespaces")
}
return resourcemonitorInstance, nil
@ -67,7 +67,6 @@ func (resMon *PodResourcesScanner) isWatchable(podNamespace string, podName stri
return false, false, err
}
klog.Infof("podresource: %s", podName)
isIntegralGuaranteed := hasExclusiveCPUs(pod)
if resMon.namespace == "*" && (isIntegralGuaranteed || hasDevice) {
@ -137,9 +136,9 @@ func (resMon *PodResourcesScanner) Scan() (ScanResponse, error) {
var status podfingerprint.Status
podFingerprintSign, err := computePodFingerprint(respPodResources, &status)
if err != nil {
klog.Errorf("podFingerprint: Unable to compute fingerprint %v", err)
klog.ErrorS(err, "failed to calculate fingerprint")
} else {
klog.Info("podFingerprint: " + status.Repr())
klog.InfoS("podFingerprint calculated", "status", status.Repr())
retVal.Attributes = append(retVal.Attributes, v1alpha2.AttributeInfo{
Name: podfingerprint.Attribute,
@ -150,7 +149,7 @@ func (resMon *PodResourcesScanner) Scan() (ScanResponse, error) {
var podResData []PodResources
for _, podResource := range respPodResources {
klog.Infof("podresource iter: %s", podResource.GetName())
klog.InfoS("scanning pod", "podName", podResource.GetName())
hasDevice := hasDevice(podResource)
isWatchable, isIntegralGuaranteed, err := resMon.isWatchable(podResource.GetNamespace(), podResource.GetName(), hasDevice)
if err != nil {
@ -233,7 +232,7 @@ func hasDevice(podResource *podresourcesapi.PodResources) bool {
return true
}
}
klog.Infof("pod:%s doesn't have devices", podResource.GetName())
klog.InfoS("pod doesn't have devices", "podName", podResource.GetName())
return false
}