From 1a687cb2868083df3d5bded594a94b4219b65498 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jose=20Luis=20Ojosnegros=20Manch=C3=B3n?= Date: Tue, 7 Feb 2023 12:13:29 +0100 Subject: [PATCH 1/3] topology-updater: Refactor Scan to expand response We are gonna add new data to Scan response so better introduce a new ScanResponse struct as Scan return value to make it easier. --- .../nfd-topology-updater.go | 6 +-- pkg/resourcemonitor/podresourcesscanner.go | 8 +-- .../podresourcesscanner_test.go | 54 +++++++++---------- pkg/resourcemonitor/types.go | 6 ++- 4 files changed, 39 insertions(+), 35 deletions(-) diff --git a/pkg/nfd-topology-updater/nfd-topology-updater.go b/pkg/nfd-topology-updater/nfd-topology-updater.go index 7488680a8..15dfa3582 100644 --- a/pkg/nfd-topology-updater/nfd-topology-updater.go +++ b/pkg/nfd-topology-updater/nfd-topology-updater.go @@ -154,13 +154,13 @@ func (w *nfdTopologyUpdater) Run() error { select { case <-crTrigger.C: klog.Infof("Scanning") - podResources, err := resScan.Scan() - utils.KlogDump(1, "podResources are", " ", podResources) + scanResponse, err := resScan.Scan() + utils.KlogDump(1, "podResources are", " ", scanResponse.PodResources) if err != nil { klog.Warningf("Scan failed: %v", err) continue } - zones = resAggr.Aggregate(podResources) + zones = resAggr.Aggregate(scanResponse.PodResources) utils.KlogDump(1, "After aggregating resources identified zones are", " ", zones) if !w.args.NoPublish { if err = w.updateNodeResourceTopology(zones); err != nil { diff --git a/pkg/resourcemonitor/podresourcesscanner.go b/pkg/resourcemonitor/podresourcesscanner.go index a42801efc..9cfdb4648 100644 --- a/pkg/resourcemonitor/podresourcesscanner.go +++ b/pkg/resourcemonitor/podresourcesscanner.go @@ -113,14 +113,14 @@ func hasIntegralCPUs(pod *corev1.Pod, container *corev1.Container) bool { } // Scan gathers all the PodResources from the system, using the podresources API client. -func (resMon *PodResourcesScanner) Scan() ([]PodResources, error) { +func (resMon *PodResourcesScanner) Scan() (ScanResponse, error) { ctx, cancel := context.WithTimeout(context.Background(), defaultPodResourcesTimeout) defer cancel() // Pod Resource API client resp, err := resMon.podResourceClient.List(ctx, &podresourcesapi.ListPodResourcesRequest{}) if err != nil { - return nil, fmt.Errorf("can't receive response: %v.Get(_) = _, %w", resMon.podResourceClient, err) + return ScanResponse{}, fmt.Errorf("can't receive response: %v.Get(_) = _, %w", resMon.podResourceClient, err) } var podResData []PodResources @@ -130,7 +130,7 @@ func (resMon *PodResourcesScanner) Scan() ([]PodResources, error) { hasDevice := hasDevice(podResource) isWatchable, isIntegralGuaranteed, err := resMon.isWatchable(podResource.GetNamespace(), podResource.GetName(), hasDevice) if err != nil { - return nil, fmt.Errorf("checking if pod in a namespace is watchable, namespace:%v, pod name %v: %v", podResource.GetNamespace(), podResource.GetName(), err) + return ScanResponse{}, fmt.Errorf("checking if pod in a namespace is watchable, namespace:%v, pod name %v: %v", podResource.GetNamespace(), podResource.GetName(), err) } if !isWatchable { continue @@ -198,7 +198,7 @@ func (resMon *PodResourcesScanner) Scan() ([]PodResources, error) { } - return podResData, nil + return ScanResponse{PodResources: podResData}, nil } func hasDevice(podResource *podresourcesapi.PodResources) bool { diff --git a/pkg/resourcemonitor/podresourcesscanner_test.go b/pkg/resourcemonitor/podresourcesscanner_test.go index c8b927364..132372d63 100644 --- a/pkg/resourcemonitor/podresourcesscanner_test.go +++ b/pkg/resourcemonitor/podresourcesscanner_test.go @@ -58,7 +58,7 @@ func TestPodScanner(t *testing.T) { So(err, ShouldNotBeNil) }) Convey("Return PodResources should be nil", func() { - So(res, ShouldBeNil) + So(res.PodResources, ShouldBeNil) }) }) @@ -70,7 +70,7 @@ func TestPodScanner(t *testing.T) { So(err, ShouldBeNil) }) Convey("Return PodResources should be zero", func() { - So(len(res), ShouldEqual, 0) + So(len(res.PodResources), ShouldEqual, 0) }) }) @@ -160,7 +160,7 @@ func TestPodScanner(t *testing.T) { So(err, ShouldBeNil) }) Convey("Return PodResources should have values", func() { - So(len(res), ShouldBeGreaterThan, 0) + So(len(res.PodResources), ShouldBeGreaterThan, 0) expected := []PodResources{ { @@ -194,14 +194,14 @@ func TestPodScanner(t *testing.T) { }, }, } - for _, podresource := range res { + for _, podresource := range res.PodResources { for _, container := range podresource.Containers { - sort.Slice(res, func(i, j int) bool { + sort.Slice(res.PodResources, func(i, j int) bool { return container.Resources[i].Name < container.Resources[j].Name }) } } - So(reflect.DeepEqual(res, expected), ShouldBeTrue) + So(reflect.DeepEqual(res.PodResources, expected), ShouldBeTrue) }) }) @@ -266,7 +266,7 @@ func TestPodScanner(t *testing.T) { So(err, ShouldBeNil) }) Convey("Return PodResources should have values", func() { - So(len(res), ShouldBeGreaterThan, 0) + So(len(res.PodResources), ShouldBeGreaterThan, 0) expected := []PodResources{ { @@ -290,7 +290,7 @@ func TestPodScanner(t *testing.T) { }, } - So(reflect.DeepEqual(res, expected), ShouldBeTrue) + So(reflect.DeepEqual(res.PodResources, expected), ShouldBeTrue) }) }) @@ -345,7 +345,7 @@ func TestPodScanner(t *testing.T) { So(err, ShouldBeNil) }) Convey("Return PodResources should have values", func() { - So(len(res), ShouldBeGreaterThan, 0) + So(len(res.PodResources), ShouldBeGreaterThan, 0) expected := []PodResources{ { @@ -365,7 +365,7 @@ func TestPodScanner(t *testing.T) { }, } - So(reflect.DeepEqual(res, expected), ShouldBeTrue) + So(reflect.DeepEqual(res.PodResources, expected), ShouldBeTrue) }) }) @@ -427,7 +427,7 @@ func TestPodScanner(t *testing.T) { So(err, ShouldBeNil) }) Convey("Return PodResources should have values", func() { - So(len(res), ShouldBeGreaterThan, 0) + So(len(res.PodResources), ShouldBeGreaterThan, 0) expected := []PodResources{ { @@ -447,7 +447,7 @@ func TestPodScanner(t *testing.T) { }, } - So(reflect.DeepEqual(res, expected), ShouldBeTrue) + So(reflect.DeepEqual(res.PodResources, expected), ShouldBeTrue) }) }) @@ -505,7 +505,7 @@ func TestPodScanner(t *testing.T) { So(err, ShouldBeNil) }) Convey("Return PodResources should have values", func() { - So(len(res), ShouldBeGreaterThan, 0) + So(len(res.PodResources), ShouldBeGreaterThan, 0) }) expected := []PodResources{ @@ -526,7 +526,7 @@ func TestPodScanner(t *testing.T) { }, } - So(reflect.DeepEqual(res, expected), ShouldBeTrue) + So(reflect.DeepEqual(res.PodResources, expected), ShouldBeTrue) }) Convey("When I successfully get valid response for (non-guaranteed) pods with devices with cpus", func() { @@ -589,7 +589,7 @@ func TestPodScanner(t *testing.T) { So(err, ShouldBeNil) }) Convey("Return PodResources should have values", func() { - So(len(res), ShouldBeGreaterThan, 0) + So(len(res.PodResources), ShouldBeGreaterThan, 0) }) expected := []PodResources{ @@ -609,7 +609,7 @@ func TestPodScanner(t *testing.T) { }, }, } - So(reflect.DeepEqual(res, expected), ShouldBeTrue) + So(reflect.DeepEqual(res.PodResources, expected), ShouldBeTrue) }) }) @@ -632,7 +632,7 @@ func TestPodScanner(t *testing.T) { So(err, ShouldNotBeNil) }) Convey("Return PodResources should be nil", func() { - So(res, ShouldBeNil) + So(res.PodResources, ShouldBeNil) }) }) @@ -644,7 +644,7 @@ func TestPodScanner(t *testing.T) { So(err, ShouldBeNil) }) Convey("Return PodResources should be zero", func() { - So(len(res), ShouldEqual, 0) + So(len(res.PodResources), ShouldEqual, 0) }) }) @@ -712,7 +712,7 @@ func TestPodScanner(t *testing.T) { So(err, ShouldBeNil) }) Convey("Return PodResources should be zero", func() { - So(len(res), ShouldEqual, 0) + So(len(res.PodResources), ShouldEqual, 0) }) }) @@ -778,7 +778,7 @@ func TestPodScanner(t *testing.T) { }) Convey("Return PodResources should have values", func() { - So(len(res), ShouldBeGreaterThan, 0) + So(len(res.PodResources), ShouldBeGreaterThan, 0) expected := []PodResources{ { @@ -802,7 +802,7 @@ func TestPodScanner(t *testing.T) { }, } - So(reflect.DeepEqual(res, expected), ShouldBeTrue) + So(reflect.DeepEqual(res.PodResources, expected), ShouldBeTrue) }) }) @@ -858,7 +858,7 @@ func TestPodScanner(t *testing.T) { }) Convey("Return PodResources should have values", func() { Convey("Return PodResources should be zero", func() { - So(len(res), ShouldEqual, 0) + So(len(res.PodResources), ShouldEqual, 0) }) }) }) @@ -923,7 +923,7 @@ func TestPodScanner(t *testing.T) { So(err, ShouldBeNil) }) Convey("Return PodResources should have values", func() { - So(len(res), ShouldEqual, 0) + So(len(res.PodResources), ShouldEqual, 0) }) }) @@ -981,7 +981,7 @@ func TestPodScanner(t *testing.T) { So(err, ShouldBeNil) }) Convey("Return PodResources should have values", func() { - So(len(res), ShouldBeGreaterThan, 0) + So(len(res.PodResources), ShouldBeGreaterThan, 0) }) expected := []PodResources{ @@ -1002,7 +1002,7 @@ func TestPodScanner(t *testing.T) { }, } - So(reflect.DeepEqual(res, expected), ShouldBeTrue) + So(reflect.DeepEqual(res.PodResources, expected), ShouldBeTrue) }) Convey("When I successfully get valid response for (non-guaranteed) pods with devices with cpus", func() { @@ -1065,7 +1065,7 @@ func TestPodScanner(t *testing.T) { So(err, ShouldBeNil) }) Convey("Return PodResources should have values", func() { - So(len(res), ShouldBeGreaterThan, 0) + So(len(res.PodResources), ShouldBeGreaterThan, 0) }) expected := []PodResources{ @@ -1085,7 +1085,7 @@ func TestPodScanner(t *testing.T) { }, }, } - So(reflect.DeepEqual(res, expected), ShouldBeTrue) + So(reflect.DeepEqual(res.PodResources, expected), ShouldBeTrue) }) }) diff --git a/pkg/resourcemonitor/types.go b/pkg/resourcemonitor/types.go index e079bb612..6385cb251 100644 --- a/pkg/resourcemonitor/types.go +++ b/pkg/resourcemonitor/types.go @@ -53,9 +53,13 @@ type PodResources struct { Containers []ContainerResources } +type ScanResponse struct { + PodResources []PodResources +} + // ResourcesScanner gathers all the PodResources from the system, using the podresources API client type ResourcesScanner interface { - Scan() ([]PodResources, error) + Scan() (ScanResponse, error) } // ResourcesAggregator aggregates resource information based on the received data from underlying hardware and podresource API From b340d112a8703d86ec98b6d6e30b62dedd2b5f3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jose=20Luis=20Ojosnegros=20Manch=C3=B3n?= Date: Wed, 1 Feb 2023 12:41:09 +0100 Subject: [PATCH 2/3] topology-updater:compute pod set fingerprint Add an option to compute the fingerprint of the current pod set on each node. Report this new fingerprint using an attribute in NRT object. --- cmd/nfd-topology-updater/main.go | 1 + .../templates/topologyupdater.yaml | 3 + .../helm/node-feature-discovery/values.yaml | 1 + docs/deployment/helm.md | 1 + .../topology-updater-commandline-reference.md | 13 ++ go.mod | 2 + go.sum | 4 + .../nfd-topology-updater.go | 28 ++++- .../nfd-topology-updater_test.go | 112 ++++++++++++++++++ pkg/resourcemonitor/podresourcesscanner.go | 43 ++++++- .../podresourcesscanner_test.go | 68 ++++++++++- pkg/resourcemonitor/types.go | 2 + 12 files changed, 269 insertions(+), 9 deletions(-) create mode 100644 pkg/nfd-topology-updater/nfd-topology-updater_test.go diff --git a/cmd/nfd-topology-updater/main.go b/cmd/nfd-topology-updater/main.go index ba472ebfc..a345beaa8 100644 --- a/cmd/nfd-topology-updater/main.go +++ b/cmd/nfd-topology-updater/main.go @@ -139,6 +139,7 @@ func initFlags(flagset *flag.FlagSet) (*topology.Args, *resourcemonitor.Args) { "Pod Resource Socket path to use.") flagset.StringVar(&args.ConfigFile, "config", "/etc/kubernetes/node-feature-discovery/nfd-topology-updater.conf", "Config file to use.") + flagset.BoolVar(&resourcemonitorArgs.PodSetFingerprint, "pods-fingerprint", false, "Compute and report the pod set fingerprint") klog.InitFlags(flagset) diff --git a/deployment/helm/node-feature-discovery/templates/topologyupdater.yaml b/deployment/helm/node-feature-discovery/templates/topologyupdater.yaml index 8f4849ac5..dc8ea898d 100644 --- a/deployment/helm/node-feature-discovery/templates/topologyupdater.yaml +++ b/deployment/helm/node-feature-discovery/templates/topologyupdater.yaml @@ -55,6 +55,9 @@ spec: - "-key-file=/etc/kubernetes/node-feature-discovery/certs/tls.key" - "-cert-file=/etc/kubernetes/node-feature-discovery/certs/tls.crt" {{- end }} + {{- if .Values.topologyUpdater.podSetFingerprint }} + - "-pods-fingerprint" + {{- end }} volumeMounts: - name: kubelet-config mountPath: /host-var/lib/kubelet/config.yaml diff --git a/deployment/helm/node-feature-discovery/values.yaml b/deployment/helm/node-feature-discovery/values.yaml index 5f2835c24..a44467394 100644 --- a/deployment/helm/node-feature-discovery/values.yaml +++ b/deployment/helm/node-feature-discovery/values.yaml @@ -424,6 +424,7 @@ topologyUpdater: tolerations: [] annotations: {} affinity: {} + podSetFingerprint: true topologyGC: enable: true diff --git a/docs/deployment/helm.md b/docs/deployment/helm.md index 7ee29debc..d28a94304 100644 --- a/docs/deployment/helm.md +++ b/docs/deployment/helm.md @@ -173,6 +173,7 @@ We have introduced the following Chart parameters. | `topologyUpdater.annotations` | dict | {} | Topology updater pod [annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) | | `topologyUpdater.affinity` | dict | {} | Topology updater pod [affinity](https://kubernetes.io/docs/tasks/configure-pod-container/assign-pods-nodes-using-node-affinity/) | | `topologyUpdater.config` | dict | | [configuration](../reference/topology-updater-configuration-reference) | +| `topologyUpdater.podSetFingerprint` | bool | false | Enables compute and report of pod fingerprint in NRT objects. | ### Topology garbage collector parameters diff --git a/docs/reference/topology-updater-commandline-reference.md b/docs/reference/topology-updater-commandline-reference.md index 101824a67..aa67ac665 100644 --- a/docs/reference/topology-updater-commandline-reference.md +++ b/docs/reference/topology-updater-commandline-reference.md @@ -147,3 +147,16 @@ Example: ```bash nfd-topology-updater -podresources-socket=/var/lib/kubelet/pod-resources/kubelet.sock ``` + +### -pods-fingerprint + +Enbles the compute and report the pod set fingerprint in the NRT. +A pod fingerprint is a compact representation of the "node state" regarding resources. + +Default: `false` + +Example: + +```bash +nfd-topology-updater -pods-fingerprint +``` diff --git a/go.mod b/go.mod index c70026e33..7f9f2dae5 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/google/go-cmp v0.5.9 github.com/jaypipes/ghw v0.8.1-0.20210827132705-c7224150a17e github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.1.0 + github.com/k8stopologyawareschedwg/podfingerprint v0.1.2 github.com/klauspost/cpuid/v2 v2.2.4 github.com/onsi/ginkgo/v2 v2.4.0 github.com/onsi/gomega v1.23.0 @@ -49,6 +50,7 @@ require ( github.com/Microsoft/go-winio v0.4.17 // indirect github.com/Microsoft/hcsshim v0.8.22 // indirect github.com/NYTimes/gziphandler v1.1.1 // indirect + github.com/OneOfOne/xxhash v1.2.8 // indirect github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d // indirect github.com/antlr/antlr4/runtime/Go/antlr v1.4.10 // indirect github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e // indirect diff --git a/go.sum b/go.sum index af51371e4..5e21b3f39 100644 --- a/go.sum +++ b/go.sum @@ -86,6 +86,8 @@ github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb0 github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cqUQ3I= github.com/NYTimes/gziphandler v1.1.1/go.mod h1:n/CVRwUEOgIxrgPvAQhUUr9oeUtvrhMomdKFjzJNB0c= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= +github.com/OneOfOne/xxhash v1.2.8 h1:31czK/TI9sNkxIKfaUfGlU47BAxQ0ztGgd9vPyqimf8= +github.com/OneOfOne/xxhash v1.2.8/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q= github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d h1:G0m3OIz70MZUWq3EgK3CesDbo8upS2Vm9/P3FtgI+Jk= @@ -422,6 +424,8 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.1.0 h1:2uCRJbv+A+fmaUaO0wLZ8oYd6cLE1dRzBQcFNxggH3s= github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.1.0/go.mod h1:AkACMQGiTgCt0lQw3m7TTU8PLH9lYKNK5e9DqFf5VuM= +github.com/k8stopologyawareschedwg/podfingerprint v0.1.2 h1:Db5KLJjPg2mKaCoeEliMlea+JMyDMWdbNPXnWbPNDyM= +github.com/k8stopologyawareschedwg/podfingerprint v0.1.2/go.mod h1:C23pM15t06dXg/OihGlqBvnYzLr+MXDXJ7zMfbNAyXI= github.com/karrick/godirwalk v1.17.0 h1:b4kY7nqDdioR/6qnbHQyDvmA17u5G1cZ6J+CZXwSWoI= github.com/karrick/godirwalk v1.17.0/go.mod h1:j4mkqPuvaLI8mp1DroR3P6ad7cyYd4c1qeJ3RV7ULlk= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= diff --git a/pkg/nfd-topology-updater/nfd-topology-updater.go b/pkg/nfd-topology-updater/nfd-topology-updater.go index 15dfa3582..f72445117 100644 --- a/pkg/nfd-topology-updater/nfd-topology-updater.go +++ b/pkg/nfd-topology-updater/nfd-topology-updater.go @@ -130,7 +130,7 @@ func (w *nfdTopologyUpdater) Run() error { var resScan resourcemonitor.ResourcesScanner - resScan, err = resourcemonitor.NewPodResourcesScanner(w.resourcemonitorArgs.Namespace, podResClient, w.apihelper) + resScan, err = resourcemonitor.NewPodResourcesScanner(w.resourcemonitorArgs.Namespace, podResClient, w.apihelper, w.resourcemonitorArgs.PodSetFingerprint) if err != nil { return fmt.Errorf("failed to initialize ResourceMonitor instance: %w", err) } @@ -163,7 +163,7 @@ func (w *nfdTopologyUpdater) Run() error { zones = resAggr.Aggregate(scanResponse.PodResources) utils.KlogDump(1, "After aggregating resources identified zones are", " ", zones) if !w.args.NoPublish { - if err = w.updateNodeResourceTopology(zones); err != nil { + if err = w.updateNodeResourceTopology(zones, scanResponse); err != nil { return err } } @@ -188,7 +188,7 @@ func (w *nfdTopologyUpdater) Stop() { } } -func (w *nfdTopologyUpdater) updateNodeResourceTopology(zoneInfo v1alpha2.ZoneList) error { +func (w *nfdTopologyUpdater) updateNodeResourceTopology(zoneInfo v1alpha2.ZoneList, scanResponse resourcemonitor.ScanResponse) error { cli, err := w.apihelper.GetTopologyClient() if err != nil { return err @@ -205,6 +205,8 @@ func (w *nfdTopologyUpdater) updateNodeResourceTopology(zoneInfo v1alpha2.ZoneLi Attributes: createTopologyAttributes(w.nodeInfo.tmPolicy, w.nodeInfo.tmScope), } + updateAttributes(&nrtNew.Attributes, scanResponse.Attributes) + _, err := cli.TopologyV1alpha2().NodeResourceTopologies().Create(context.TODO(), &nrtNew, metav1.CreateOptions{}) if err != nil { return fmt.Errorf("failed to create NodeResourceTopology: %w", err) @@ -216,6 +218,7 @@ func (w *nfdTopologyUpdater) updateNodeResourceTopology(zoneInfo v1alpha2.ZoneLi nrtMutated := nrt.DeepCopy() nrtMutated.Zones = zoneInfo + updateAttributes(&nrtMutated.Attributes, scanResponse.Attributes) nrtUpdated, err := cli.TopologyV1alpha2().NodeResourceTopologies().Update(context.TODO(), nrtMutated, metav1.UpdateOptions{}) if err != nil { @@ -261,3 +264,22 @@ func createTopologyAttributes(policy string, scope string) v1alpha2.AttributeLis }, } } + +func updateAttribute(attrList *v1alpha2.AttributeList, attrInfo v1alpha2.AttributeInfo) { + if attrList == nil { + return + } + + for idx := range *attrList { + if (*attrList)[idx].Name == attrInfo.Name { + (*attrList)[idx].Value = attrInfo.Value + return + } + } + *attrList = append(*attrList, attrInfo) +} +func updateAttributes(lhs *v1alpha2.AttributeList, rhs v1alpha2.AttributeList) { + for _, attr := range rhs { + updateAttribute(lhs, attr) + } +} diff --git a/pkg/nfd-topology-updater/nfd-topology-updater_test.go b/pkg/nfd-topology-updater/nfd-topology-updater_test.go new file mode 100644 index 000000000..f81990427 --- /dev/null +++ b/pkg/nfd-topology-updater/nfd-topology-updater_test.go @@ -0,0 +1,112 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nfdtopologyupdater + +import ( + "fmt" + "testing" + + "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" + . "github.com/smartystreets/goconvey/convey" +) + +func TestTopologyUpdater(t *testing.T) { + + Convey("Given a list of Attributes", t, func() { + + attr_two := v1alpha2.AttributeInfo{ + Name: "attr_two_name", + Value: "attr_two_value", + } + + attrList := v1alpha2.AttributeList{ + v1alpha2.AttributeInfo{ + Name: "attr_one_name", + Value: "attr_one_value", + }, + attr_two, + v1alpha2.AttributeInfo{ + Name: "attr_three_name", + Value: "attr_three_value", + }, + } + attrListLen := len(attrList) + attrNames := getListOfNames(attrList) + + Convey("When an existing attribute is updated", func() { + + updatedAttribute := v1alpha2.AttributeInfo{ + Name: attr_two.Name, + Value: attr_two.Value + "_new", + } + updateAttribute(&attrList, updatedAttribute) + + Convey("Then list should have the same number of elements", func() { + So(attrList, ShouldHaveLength, attrListLen) + }) + Convey("Then the order of the elemens should be the same", func() { + So(attrNames, ShouldResemble, getListOfNames(attrList)) + }) + Convey("Then Attribute value in the list should be updated", func() { + attr, err := findAttributeByName(attrList, attr_two.Name) + So(err, ShouldBeNil) + So(attr.Value, ShouldEqual, updatedAttribute.Value) + }) + }) + + Convey("When a non existing attribute is updated", func() { + completelyNewAttribute := v1alpha2.AttributeInfo{ + Name: "NonExistingAttribute_Name", + Value: "NonExistingAttribute_Value", + } + _, err := findAttributeByName(attrList, completelyNewAttribute.Name) + So(err, ShouldNotBeNil) + + updateAttribute(&attrList, completelyNewAttribute) + + Convey("Then list should have the one more element", func() { + So(attrList, ShouldHaveLength, attrListLen+1) + }) + + Convey("Then new Attribute should be added at the end of the list", func() { + So(attrList[len(attrList)-1], ShouldResemble, completelyNewAttribute) + }) + + Convey("Then the order of the elemens should be the same", func() { + So(attrNames, ShouldResemble, getListOfNames(attrList[:len(attrList)-1])) + }) + }) + }) +} + +func getListOfNames(attrList v1alpha2.AttributeList) []string { + ret := make([]string, len(attrList)) + + for idx, attr := range attrList { + ret[idx] = attr.Name + } + return ret +} + +func findAttributeByName(attrList v1alpha2.AttributeList, name string) (v1alpha2.AttributeInfo, error) { + for _, attr := range attrList { + if attr.Name == name { + return attr, nil + } + } + return v1alpha2.AttributeInfo{}, fmt.Errorf("Attribute Not Found name:=%s", name) +} diff --git a/pkg/resourcemonitor/podresourcesscanner.go b/pkg/resourcemonitor/podresourcesscanner.go index 9cfdb4648..00775d9ba 100644 --- a/pkg/resourcemonitor/podresourcesscanner.go +++ b/pkg/resourcemonitor/podresourcesscanner.go @@ -27,20 +27,25 @@ import ( podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1" "sigs.k8s.io/node-feature-discovery/pkg/apihelper" + + "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" + "github.com/k8stopologyawareschedwg/podfingerprint" ) type PodResourcesScanner struct { namespace string podResourceClient podresourcesapi.PodResourcesListerClient apihelper apihelper.APIHelpers + podFingerprint bool } // NewPodResourcesScanner creates a new ResourcesScanner instance -func NewPodResourcesScanner(namespace string, podResourceClient podresourcesapi.PodResourcesListerClient, kubeApihelper apihelper.APIHelpers) (ResourcesScanner, error) { +func NewPodResourcesScanner(namespace string, podResourceClient podresourcesapi.PodResourcesListerClient, kubeApihelper apihelper.APIHelpers, podFingerprint bool) (ResourcesScanner, error) { resourcemonitorInstance := &PodResourcesScanner{ namespace: namespace, podResourceClient: podResourceClient, apihelper: kubeApihelper, + podFingerprint: podFingerprint, } if resourcemonitorInstance.namespace != "*" { klog.Infof("watching namespace %q", resourcemonitorInstance.namespace) @@ -123,9 +128,28 @@ func (resMon *PodResourcesScanner) Scan() (ScanResponse, error) { return ScanResponse{}, fmt.Errorf("can't receive response: %v.Get(_) = _, %w", resMon.podResourceClient, err) } + respPodResources := resp.GetPodResources() + retVal := ScanResponse{ + Attributes: v1alpha2.AttributeList{}, + } + + if resMon.podFingerprint && len(respPodResources) > 0 { + var status podfingerprint.Status + podFingerprintSign, err := computePodFingerprint(respPodResources, &status) + if err != nil { + klog.Errorf("podFingerprint: Unable to compute fingerprint %v", err) + } else { + klog.Info("podFingerprint: " + status.Repr()) + + retVal.Attributes = append(retVal.Attributes, v1alpha2.AttributeInfo{ + Name: podfingerprint.Attribute, + Value: podFingerprintSign, + }) + } + } var podResData []PodResources - for _, podResource := range resp.GetPodResources() { + for _, podResource := range respPodResources { klog.Infof("podresource iter: %s", podResource.GetName()) hasDevice := hasDevice(podResource) isWatchable, isIntegralGuaranteed, err := resMon.isWatchable(podResource.GetNamespace(), podResource.GetName(), hasDevice) @@ -198,7 +222,9 @@ func (resMon *PodResourcesScanner) Scan() (ScanResponse, error) { } - return ScanResponse{PodResources: podResData}, nil + retVal.PodResources = podResData + + return retVal, nil } func hasDevice(podResource *podresourcesapi.PodResources) bool { @@ -225,3 +251,14 @@ func getNumaNodeIds(topologyInfo *podresourcesapi.TopologyInfo) []int { return topology } + +func computePodFingerprint(podResources []*podresourcesapi.PodResources, status *podfingerprint.Status) (string, error) { + fingerprint := podfingerprint.NewTracingFingerprint(len(podResources), status) + for _, podResource := range podResources { + err := fingerprint.Add(podResource.Namespace, podResource.Name) + if err != nil { + return "", err + } + } + return fingerprint.Sign(), nil +} diff --git a/pkg/resourcemonitor/podresourcesscanner_test.go b/pkg/resourcemonitor/podresourcesscanner_test.go index 132372d63..1a0a3d091 100644 --- a/pkg/resourcemonitor/podresourcesscanner_test.go +++ b/pkg/resourcemonitor/podresourcesscanner_test.go @@ -22,6 +22,7 @@ import ( "sort" "testing" + "github.com/k8stopologyawareschedwg/podfingerprint" . "github.com/smartystreets/goconvey/convey" "github.com/stretchr/testify/mock" @@ -40,11 +41,24 @@ func TestPodScanner(t *testing.T) { var resScan ResourcesScanner var err error + // PodFingerprint only depends on Name/Namespace of the pods running on a Node + // so we can precalculate the expected value + expectedFingerprintCompute := func(pods []*corev1.Pod) (string, error) { + pf := podfingerprint.NewFingerprint(len(pods)) + for _, pr := range pods { + if err := pf.Add(pr.Namespace, pr.Name); err != nil { + return "", err + } + } + return pf.Sign(), nil + } + Convey("When I scan for pod resources using fake client and no namespace", t, func() { mockPodResClient := new(podres.MockPodResourcesListerClient) mockAPIHelper := new(apihelper.MockAPIHelpers) mockClient := &k8sclient.Clientset{} - resScan, err = NewPodResourcesScanner("*", mockPodResClient, mockAPIHelper) + computePodFingerprint := true + resScan, err = NewPodResourcesScanner("*", mockPodResClient, mockAPIHelper, computePodFingerprint) Convey("Creating a Resources Scanner using a mock client", func() { So(err, ShouldBeNil) @@ -60,6 +74,9 @@ func TestPodScanner(t *testing.T) { Convey("Return PodResources should be nil", func() { So(res.PodResources, ShouldBeNil) }) + Convey("Return Attributes should be empty", func() { + So(res.Attributes, ShouldBeEmpty) + }) }) Convey("When I successfully get empty response", func() { @@ -72,6 +89,9 @@ func TestPodScanner(t *testing.T) { Convey("Return PodResources should be zero", func() { So(len(res.PodResources), ShouldEqual, 0) }) + Convey("Return Attributes should be empty", func() { + So(res.Attributes, ShouldBeEmpty) + }) }) Convey("When I successfully get valid response", func() { @@ -203,6 +223,14 @@ func TestPodScanner(t *testing.T) { } So(reflect.DeepEqual(res.PodResources, expected), ShouldBeTrue) }) + Convey("Return Attributes should have pod fingerprint attribute with proper value", func() { + So(len(res.Attributes), ShouldEqual, 1) + // can compute expected fringerprint only with the list of pods in the node. + expectedFingerprint, err := expectedFingerprintCompute([]*corev1.Pod{pod}) + So(err, ShouldBeNil) + So(res.Attributes[0].Name, ShouldEqual, podfingerprint.Attribute) + So(res.Attributes[0].Value, ShouldEqual, expectedFingerprint) + }) }) Convey("When I successfully get valid response without topology", func() { @@ -292,6 +320,14 @@ func TestPodScanner(t *testing.T) { So(reflect.DeepEqual(res.PodResources, expected), ShouldBeTrue) }) + Convey("Return Attributes should have pod fingerprint attribute with proper value", func() { + So(len(res.Attributes), ShouldEqual, 1) + // can compute expected fringerprint only with the list of pods in the node. + expectedFingerprint, err := expectedFingerprintCompute([]*corev1.Pod{pod}) + So(err, ShouldBeNil) + So(res.Attributes[0].Name, ShouldEqual, podfingerprint.Attribute) + So(res.Attributes[0].Value, ShouldEqual, expectedFingerprint) + }) }) Convey("When I successfully get valid response without devices", func() { @@ -367,6 +403,14 @@ func TestPodScanner(t *testing.T) { So(reflect.DeepEqual(res.PodResources, expected), ShouldBeTrue) }) + Convey("Return Attributes should have pod fingerprint attribute with proper value", func() { + So(len(res.Attributes), ShouldEqual, 1) + // can compute expected fringerprint only with the list of pods in the node. + expectedFingerprint, err := expectedFingerprintCompute([]*corev1.Pod{pod}) + So(err, ShouldBeNil) + So(res.Attributes[0].Name, ShouldEqual, podfingerprint.Attribute) + So(res.Attributes[0].Value, ShouldEqual, expectedFingerprint) + }) }) Convey("When I successfully get valid response without cpus", func() { @@ -507,6 +551,14 @@ func TestPodScanner(t *testing.T) { Convey("Return PodResources should have values", func() { So(len(res.PodResources), ShouldBeGreaterThan, 0) }) + Convey("Return Attributes should have pod fingerprint attribute with proper value", func() { + So(len(res.Attributes), ShouldEqual, 1) + // can compute expected fringerprint only with the list of pods in the node. + expectedFingerprint, err := expectedFingerprintCompute([]*corev1.Pod{pod}) + So(err, ShouldBeNil) + So(res.Attributes[0].Name, ShouldEqual, podfingerprint.Attribute) + So(res.Attributes[0].Value, ShouldEqual, expectedFingerprint) + }) expected := []PodResources{ { @@ -610,15 +662,25 @@ func TestPodScanner(t *testing.T) { }, } So(reflect.DeepEqual(res.PodResources, expected), ShouldBeTrue) - }) + Convey("Return Attributes should have pod fingerprint attribute with proper value", func() { + So(len(res.Attributes), ShouldEqual, 1) + + // can compute expected fringerprint only with the list of pods in the node. + expectedFingerprint, err := expectedFingerprintCompute([]*corev1.Pod{pod}) + So(err, ShouldBeNil) + So(res.Attributes[0].Name, ShouldEqual, podfingerprint.Attribute) + So(res.Attributes[0].Value, ShouldEqual, expectedFingerprint) + }) + }) }) Convey("When I scan for pod resources using fake client and given namespace", t, func() { mockPodResClient := new(podres.MockPodResourcesListerClient) mockAPIHelper := new(apihelper.MockAPIHelpers) mockClient := &k8sclient.Clientset{} - resScan, err = NewPodResourcesScanner("pod-res-test", mockPodResClient, mockAPIHelper) + computePodFingerprint := false + resScan, err = NewPodResourcesScanner("pod-res-test", mockPodResClient, mockAPIHelper, computePodFingerprint) Convey("Creating a Resources Scanner using a mock client", func() { So(err, ShouldBeNil) diff --git a/pkg/resourcemonitor/types.go b/pkg/resourcemonitor/types.go index 6385cb251..96589da13 100644 --- a/pkg/resourcemonitor/types.go +++ b/pkg/resourcemonitor/types.go @@ -31,6 +31,7 @@ type Args struct { Namespace string KubeletConfigURI string APIAuthTokenFile string + PodSetFingerprint bool } // ResourceInfo stores information of resources and their corresponding IDs obtained from PodResource API @@ -55,6 +56,7 @@ type PodResources struct { type ScanResponse struct { PodResources []PodResources + Attributes topologyv1alpha2.AttributeList } // ResourcesScanner gathers all the PodResources from the system, using the podresources API client From b65015027f3c241b6bc7be2e0086e4a70d63798e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jose=20Luis=20Ojosnegros=20Manch=C3=B3n?= Date: Fri, 10 Feb 2023 12:52:44 +0100 Subject: [PATCH 3/3] topology-updater: e2e test for podFingerprint --- test/e2e/topology_updater_test.go | 67 +++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/test/e2e/topology_updater_test.go b/test/e2e/topology_updater_test.go index 53fc83ebf..94da9dbaa 100644 --- a/test/e2e/topology_updater_test.go +++ b/test/e2e/topology_updater_test.go @@ -29,6 +29,7 @@ import ( "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" topologyclientset "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/generated/clientset/versioned" + "github.com/k8stopologyawareschedwg/podfingerprint" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -304,8 +305,74 @@ excludeList: }, 1*time.Minute, 10*time.Second).Should(BeFalse()) }) }) + When("topology-updater configure to compute pod fingerprint", func() { + BeforeEach(func() { + cfg, err := testutils.GetConfig() + Expect(err).ToNot(HaveOccurred()) + + kcfg := cfg.GetKubeletConfig() + By(fmt.Sprintf("Using config (%#v)", kcfg)) + + podSpecOpts := []testpod.SpecOption{ + testpod.SpecWithContainerImage(dockerImage()), + testpod.SpecWithContainerExtraArgs("-pods-fingerprint"), + } + topologyUpdaterDaemonSet = testds.NFDTopologyUpdater(kcfg, podSpecOpts...) + }) + It("noderesourcetopology should advertise pod fingerprint in top-level attribute", func() { + Eventually(func() bool { + // get node topology + nodeTopology := testutils.GetNodeTopology(topologyClient, topologyUpdaterNode.Name) + + // look for attribute + podFingerprintAttribute, err := findAttribute(nodeTopology.Attributes, podfingerprint.Attribute) + if err != nil { + framework.Logf("podFingerprint attributte %q not found: %v", podfingerprint.Attribute, err) + return false + } + // get pods in node + pods, err := f.ClientSet.CoreV1().Pods("").List(context.TODO(), metav1.ListOptions{FieldSelector: "spec.nodeName=" + topologyUpdaterNode.Name}) + if err != nil { + framework.Logf("podFingerprint error while recovering %q node pods: %v", topologyUpdaterNode.Name, err) + return false + } + if len(pods.Items) == 0 { + framework.Logf("podFingerprint No pods in node %q", topologyUpdaterNode.Name) + return false + } + + // compute expected value + pf := podfingerprint.NewFingerprint(len(pods.Items)) + for _, pod := range pods.Items { + err = pf.Add(pod.Namespace, pod.Name) + if err != nil { + framework.Logf("error while computing expected podFingerprint %v", err) + return false + } + } + expectedPodFingerprint := pf.Sign() + + if podFingerprintAttribute.Value != expectedPodFingerprint { + framework.Logf("podFingerprint attributte error expected: %q actual: %q", expectedPodFingerprint, podFingerprintAttribute.Value) + return false + } + + return true + + }, 1*time.Minute, 10*time.Second).Should(BeTrue()) + }) + }) }) +func findAttribute(attributes v1alpha2.AttributeList, attributeName string) (v1alpha2.AttributeInfo, error) { + for _, attrInfo := range attributes { + if attrInfo.Name == attributeName { + return attrInfo, nil + } + } + return v1alpha2.AttributeInfo{}, fmt.Errorf("attribute %q not found", attributeName) +} + // lessAllocatableResources specialize CompareAllocatableResources for this specific e2e use case. func lessAllocatableResources(expected, got map[string]corev1.ResourceList) (string, string, bool) { zoneName, resName, cmp, ok := testutils.CompareAllocatableResources(expected, got)