1
0
Fork 0
mirror of https://github.com/kubernetes-sigs/node-feature-discovery.git synced 2024-12-14 11:57:51 +00:00

topology-updater:compute pod set fingerprint

Add an option to compute the fingerprint of the current pod set on each
node.

Report this new fingerprint using an attribute in NRT object.
This commit is contained in:
Jose Luis Ojosnegros Manchón 2023-02-01 12:41:09 +01:00
parent 1a687cb286
commit b340d112a8
12 changed files with 269 additions and 9 deletions

View file

@ -139,6 +139,7 @@ func initFlags(flagset *flag.FlagSet) (*topology.Args, *resourcemonitor.Args) {
"Pod Resource Socket path to use.")
flagset.StringVar(&args.ConfigFile, "config", "/etc/kubernetes/node-feature-discovery/nfd-topology-updater.conf",
"Config file to use.")
flagset.BoolVar(&resourcemonitorArgs.PodSetFingerprint, "pods-fingerprint", false, "Compute and report the pod set fingerprint")
klog.InitFlags(flagset)

View file

@ -55,6 +55,9 @@ spec:
- "-key-file=/etc/kubernetes/node-feature-discovery/certs/tls.key"
- "-cert-file=/etc/kubernetes/node-feature-discovery/certs/tls.crt"
{{- end }}
{{- if .Values.topologyUpdater.podSetFingerprint }}
- "-pods-fingerprint"
{{- end }}
volumeMounts:
- name: kubelet-config
mountPath: /host-var/lib/kubelet/config.yaml

View file

@ -424,6 +424,7 @@ topologyUpdater:
tolerations: []
annotations: {}
affinity: {}
podSetFingerprint: true
topologyGC:
enable: true

View file

@ -173,6 +173,7 @@ We have introduced the following Chart parameters.
| `topologyUpdater.annotations` | dict | {} | Topology updater pod [annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) |
| `topologyUpdater.affinity` | dict | {} | Topology updater pod [affinity](https://kubernetes.io/docs/tasks/configure-pod-container/assign-pods-nodes-using-node-affinity/) |
| `topologyUpdater.config` | dict | | [configuration](../reference/topology-updater-configuration-reference) |
| `topologyUpdater.podSetFingerprint` | bool | false | Enables compute and report of pod fingerprint in NRT objects. |
### Topology garbage collector parameters

View file

@ -147,3 +147,16 @@ Example:
```bash
nfd-topology-updater -podresources-socket=/var/lib/kubelet/pod-resources/kubelet.sock
```
### -pods-fingerprint
Enbles the compute and report the pod set fingerprint in the NRT.
A pod fingerprint is a compact representation of the "node state" regarding resources.
Default: `false`
Example:
```bash
nfd-topology-updater -pods-fingerprint
```

2
go.mod
View file

@ -9,6 +9,7 @@ require (
github.com/google/go-cmp v0.5.9
github.com/jaypipes/ghw v0.8.1-0.20210827132705-c7224150a17e
github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.1.0
github.com/k8stopologyawareschedwg/podfingerprint v0.1.2
github.com/klauspost/cpuid/v2 v2.2.4
github.com/onsi/ginkgo/v2 v2.4.0
github.com/onsi/gomega v1.23.0
@ -49,6 +50,7 @@ require (
github.com/Microsoft/go-winio v0.4.17 // indirect
github.com/Microsoft/hcsshim v0.8.22 // indirect
github.com/NYTimes/gziphandler v1.1.1 // indirect
github.com/OneOfOne/xxhash v1.2.8 // indirect
github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d // indirect
github.com/antlr/antlr4/runtime/Go/antlr v1.4.10 // indirect
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e // indirect

4
go.sum
View file

@ -86,6 +86,8 @@ github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb0
github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cqUQ3I=
github.com/NYTimes/gziphandler v1.1.1/go.mod h1:n/CVRwUEOgIxrgPvAQhUUr9oeUtvrhMomdKFjzJNB0c=
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
github.com/OneOfOne/xxhash v1.2.8 h1:31czK/TI9sNkxIKfaUfGlU47BAxQ0ztGgd9vPyqimf8=
github.com/OneOfOne/xxhash v1.2.8/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q=
github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d h1:G0m3OIz70MZUWq3EgK3CesDbo8upS2Vm9/P3FtgI+Jk=
@ -422,6 +424,8 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V
github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=
github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.1.0 h1:2uCRJbv+A+fmaUaO0wLZ8oYd6cLE1dRzBQcFNxggH3s=
github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.1.0/go.mod h1:AkACMQGiTgCt0lQw3m7TTU8PLH9lYKNK5e9DqFf5VuM=
github.com/k8stopologyawareschedwg/podfingerprint v0.1.2 h1:Db5KLJjPg2mKaCoeEliMlea+JMyDMWdbNPXnWbPNDyM=
github.com/k8stopologyawareschedwg/podfingerprint v0.1.2/go.mod h1:C23pM15t06dXg/OihGlqBvnYzLr+MXDXJ7zMfbNAyXI=
github.com/karrick/godirwalk v1.17.0 h1:b4kY7nqDdioR/6qnbHQyDvmA17u5G1cZ6J+CZXwSWoI=
github.com/karrick/godirwalk v1.17.0/go.mod h1:j4mkqPuvaLI8mp1DroR3P6ad7cyYd4c1qeJ3RV7ULlk=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=

View file

@ -130,7 +130,7 @@ func (w *nfdTopologyUpdater) Run() error {
var resScan resourcemonitor.ResourcesScanner
resScan, err = resourcemonitor.NewPodResourcesScanner(w.resourcemonitorArgs.Namespace, podResClient, w.apihelper)
resScan, err = resourcemonitor.NewPodResourcesScanner(w.resourcemonitorArgs.Namespace, podResClient, w.apihelper, w.resourcemonitorArgs.PodSetFingerprint)
if err != nil {
return fmt.Errorf("failed to initialize ResourceMonitor instance: %w", err)
}
@ -163,7 +163,7 @@ func (w *nfdTopologyUpdater) Run() error {
zones = resAggr.Aggregate(scanResponse.PodResources)
utils.KlogDump(1, "After aggregating resources identified zones are", " ", zones)
if !w.args.NoPublish {
if err = w.updateNodeResourceTopology(zones); err != nil {
if err = w.updateNodeResourceTopology(zones, scanResponse); err != nil {
return err
}
}
@ -188,7 +188,7 @@ func (w *nfdTopologyUpdater) Stop() {
}
}
func (w *nfdTopologyUpdater) updateNodeResourceTopology(zoneInfo v1alpha2.ZoneList) error {
func (w *nfdTopologyUpdater) updateNodeResourceTopology(zoneInfo v1alpha2.ZoneList, scanResponse resourcemonitor.ScanResponse) error {
cli, err := w.apihelper.GetTopologyClient()
if err != nil {
return err
@ -205,6 +205,8 @@ func (w *nfdTopologyUpdater) updateNodeResourceTopology(zoneInfo v1alpha2.ZoneLi
Attributes: createTopologyAttributes(w.nodeInfo.tmPolicy, w.nodeInfo.tmScope),
}
updateAttributes(&nrtNew.Attributes, scanResponse.Attributes)
_, err := cli.TopologyV1alpha2().NodeResourceTopologies().Create(context.TODO(), &nrtNew, metav1.CreateOptions{})
if err != nil {
return fmt.Errorf("failed to create NodeResourceTopology: %w", err)
@ -216,6 +218,7 @@ func (w *nfdTopologyUpdater) updateNodeResourceTopology(zoneInfo v1alpha2.ZoneLi
nrtMutated := nrt.DeepCopy()
nrtMutated.Zones = zoneInfo
updateAttributes(&nrtMutated.Attributes, scanResponse.Attributes)
nrtUpdated, err := cli.TopologyV1alpha2().NodeResourceTopologies().Update(context.TODO(), nrtMutated, metav1.UpdateOptions{})
if err != nil {
@ -261,3 +264,22 @@ func createTopologyAttributes(policy string, scope string) v1alpha2.AttributeLis
},
}
}
func updateAttribute(attrList *v1alpha2.AttributeList, attrInfo v1alpha2.AttributeInfo) {
if attrList == nil {
return
}
for idx := range *attrList {
if (*attrList)[idx].Name == attrInfo.Name {
(*attrList)[idx].Value = attrInfo.Value
return
}
}
*attrList = append(*attrList, attrInfo)
}
func updateAttributes(lhs *v1alpha2.AttributeList, rhs v1alpha2.AttributeList) {
for _, attr := range rhs {
updateAttribute(lhs, attr)
}
}

View file

@ -0,0 +1,112 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nfdtopologyupdater
import (
"fmt"
"testing"
"github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2"
. "github.com/smartystreets/goconvey/convey"
)
func TestTopologyUpdater(t *testing.T) {
Convey("Given a list of Attributes", t, func() {
attr_two := v1alpha2.AttributeInfo{
Name: "attr_two_name",
Value: "attr_two_value",
}
attrList := v1alpha2.AttributeList{
v1alpha2.AttributeInfo{
Name: "attr_one_name",
Value: "attr_one_value",
},
attr_two,
v1alpha2.AttributeInfo{
Name: "attr_three_name",
Value: "attr_three_value",
},
}
attrListLen := len(attrList)
attrNames := getListOfNames(attrList)
Convey("When an existing attribute is updated", func() {
updatedAttribute := v1alpha2.AttributeInfo{
Name: attr_two.Name,
Value: attr_two.Value + "_new",
}
updateAttribute(&attrList, updatedAttribute)
Convey("Then list should have the same number of elements", func() {
So(attrList, ShouldHaveLength, attrListLen)
})
Convey("Then the order of the elemens should be the same", func() {
So(attrNames, ShouldResemble, getListOfNames(attrList))
})
Convey("Then Attribute value in the list should be updated", func() {
attr, err := findAttributeByName(attrList, attr_two.Name)
So(err, ShouldBeNil)
So(attr.Value, ShouldEqual, updatedAttribute.Value)
})
})
Convey("When a non existing attribute is updated", func() {
completelyNewAttribute := v1alpha2.AttributeInfo{
Name: "NonExistingAttribute_Name",
Value: "NonExistingAttribute_Value",
}
_, err := findAttributeByName(attrList, completelyNewAttribute.Name)
So(err, ShouldNotBeNil)
updateAttribute(&attrList, completelyNewAttribute)
Convey("Then list should have the one more element", func() {
So(attrList, ShouldHaveLength, attrListLen+1)
})
Convey("Then new Attribute should be added at the end of the list", func() {
So(attrList[len(attrList)-1], ShouldResemble, completelyNewAttribute)
})
Convey("Then the order of the elemens should be the same", func() {
So(attrNames, ShouldResemble, getListOfNames(attrList[:len(attrList)-1]))
})
})
})
}
func getListOfNames(attrList v1alpha2.AttributeList) []string {
ret := make([]string, len(attrList))
for idx, attr := range attrList {
ret[idx] = attr.Name
}
return ret
}
func findAttributeByName(attrList v1alpha2.AttributeList, name string) (v1alpha2.AttributeInfo, error) {
for _, attr := range attrList {
if attr.Name == name {
return attr, nil
}
}
return v1alpha2.AttributeInfo{}, fmt.Errorf("Attribute Not Found name:=%s", name)
}

View file

@ -27,20 +27,25 @@ import (
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
"sigs.k8s.io/node-feature-discovery/pkg/apihelper"
"github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2"
"github.com/k8stopologyawareschedwg/podfingerprint"
)
type PodResourcesScanner struct {
namespace string
podResourceClient podresourcesapi.PodResourcesListerClient
apihelper apihelper.APIHelpers
podFingerprint bool
}
// NewPodResourcesScanner creates a new ResourcesScanner instance
func NewPodResourcesScanner(namespace string, podResourceClient podresourcesapi.PodResourcesListerClient, kubeApihelper apihelper.APIHelpers) (ResourcesScanner, error) {
func NewPodResourcesScanner(namespace string, podResourceClient podresourcesapi.PodResourcesListerClient, kubeApihelper apihelper.APIHelpers, podFingerprint bool) (ResourcesScanner, error) {
resourcemonitorInstance := &PodResourcesScanner{
namespace: namespace,
podResourceClient: podResourceClient,
apihelper: kubeApihelper,
podFingerprint: podFingerprint,
}
if resourcemonitorInstance.namespace != "*" {
klog.Infof("watching namespace %q", resourcemonitorInstance.namespace)
@ -123,9 +128,28 @@ func (resMon *PodResourcesScanner) Scan() (ScanResponse, error) {
return ScanResponse{}, fmt.Errorf("can't receive response: %v.Get(_) = _, %w", resMon.podResourceClient, err)
}
respPodResources := resp.GetPodResources()
retVal := ScanResponse{
Attributes: v1alpha2.AttributeList{},
}
if resMon.podFingerprint && len(respPodResources) > 0 {
var status podfingerprint.Status
podFingerprintSign, err := computePodFingerprint(respPodResources, &status)
if err != nil {
klog.Errorf("podFingerprint: Unable to compute fingerprint %v", err)
} else {
klog.Info("podFingerprint: " + status.Repr())
retVal.Attributes = append(retVal.Attributes, v1alpha2.AttributeInfo{
Name: podfingerprint.Attribute,
Value: podFingerprintSign,
})
}
}
var podResData []PodResources
for _, podResource := range resp.GetPodResources() {
for _, podResource := range respPodResources {
klog.Infof("podresource iter: %s", podResource.GetName())
hasDevice := hasDevice(podResource)
isWatchable, isIntegralGuaranteed, err := resMon.isWatchable(podResource.GetNamespace(), podResource.GetName(), hasDevice)
@ -198,7 +222,9 @@ func (resMon *PodResourcesScanner) Scan() (ScanResponse, error) {
}
return ScanResponse{PodResources: podResData}, nil
retVal.PodResources = podResData
return retVal, nil
}
func hasDevice(podResource *podresourcesapi.PodResources) bool {
@ -225,3 +251,14 @@ func getNumaNodeIds(topologyInfo *podresourcesapi.TopologyInfo) []int {
return topology
}
func computePodFingerprint(podResources []*podresourcesapi.PodResources, status *podfingerprint.Status) (string, error) {
fingerprint := podfingerprint.NewTracingFingerprint(len(podResources), status)
for _, podResource := range podResources {
err := fingerprint.Add(podResource.Namespace, podResource.Name)
if err != nil {
return "", err
}
}
return fingerprint.Sign(), nil
}

View file

@ -22,6 +22,7 @@ import (
"sort"
"testing"
"github.com/k8stopologyawareschedwg/podfingerprint"
. "github.com/smartystreets/goconvey/convey"
"github.com/stretchr/testify/mock"
@ -40,11 +41,24 @@ func TestPodScanner(t *testing.T) {
var resScan ResourcesScanner
var err error
// PodFingerprint only depends on Name/Namespace of the pods running on a Node
// so we can precalculate the expected value
expectedFingerprintCompute := func(pods []*corev1.Pod) (string, error) {
pf := podfingerprint.NewFingerprint(len(pods))
for _, pr := range pods {
if err := pf.Add(pr.Namespace, pr.Name); err != nil {
return "", err
}
}
return pf.Sign(), nil
}
Convey("When I scan for pod resources using fake client and no namespace", t, func() {
mockPodResClient := new(podres.MockPodResourcesListerClient)
mockAPIHelper := new(apihelper.MockAPIHelpers)
mockClient := &k8sclient.Clientset{}
resScan, err = NewPodResourcesScanner("*", mockPodResClient, mockAPIHelper)
computePodFingerprint := true
resScan, err = NewPodResourcesScanner("*", mockPodResClient, mockAPIHelper, computePodFingerprint)
Convey("Creating a Resources Scanner using a mock client", func() {
So(err, ShouldBeNil)
@ -60,6 +74,9 @@ func TestPodScanner(t *testing.T) {
Convey("Return PodResources should be nil", func() {
So(res.PodResources, ShouldBeNil)
})
Convey("Return Attributes should be empty", func() {
So(res.Attributes, ShouldBeEmpty)
})
})
Convey("When I successfully get empty response", func() {
@ -72,6 +89,9 @@ func TestPodScanner(t *testing.T) {
Convey("Return PodResources should be zero", func() {
So(len(res.PodResources), ShouldEqual, 0)
})
Convey("Return Attributes should be empty", func() {
So(res.Attributes, ShouldBeEmpty)
})
})
Convey("When I successfully get valid response", func() {
@ -203,6 +223,14 @@ func TestPodScanner(t *testing.T) {
}
So(reflect.DeepEqual(res.PodResources, expected), ShouldBeTrue)
})
Convey("Return Attributes should have pod fingerprint attribute with proper value", func() {
So(len(res.Attributes), ShouldEqual, 1)
// can compute expected fringerprint only with the list of pods in the node.
expectedFingerprint, err := expectedFingerprintCompute([]*corev1.Pod{pod})
So(err, ShouldBeNil)
So(res.Attributes[0].Name, ShouldEqual, podfingerprint.Attribute)
So(res.Attributes[0].Value, ShouldEqual, expectedFingerprint)
})
})
Convey("When I successfully get valid response without topology", func() {
@ -292,6 +320,14 @@ func TestPodScanner(t *testing.T) {
So(reflect.DeepEqual(res.PodResources, expected), ShouldBeTrue)
})
Convey("Return Attributes should have pod fingerprint attribute with proper value", func() {
So(len(res.Attributes), ShouldEqual, 1)
// can compute expected fringerprint only with the list of pods in the node.
expectedFingerprint, err := expectedFingerprintCompute([]*corev1.Pod{pod})
So(err, ShouldBeNil)
So(res.Attributes[0].Name, ShouldEqual, podfingerprint.Attribute)
So(res.Attributes[0].Value, ShouldEqual, expectedFingerprint)
})
})
Convey("When I successfully get valid response without devices", func() {
@ -367,6 +403,14 @@ func TestPodScanner(t *testing.T) {
So(reflect.DeepEqual(res.PodResources, expected), ShouldBeTrue)
})
Convey("Return Attributes should have pod fingerprint attribute with proper value", func() {
So(len(res.Attributes), ShouldEqual, 1)
// can compute expected fringerprint only with the list of pods in the node.
expectedFingerprint, err := expectedFingerprintCompute([]*corev1.Pod{pod})
So(err, ShouldBeNil)
So(res.Attributes[0].Name, ShouldEqual, podfingerprint.Attribute)
So(res.Attributes[0].Value, ShouldEqual, expectedFingerprint)
})
})
Convey("When I successfully get valid response without cpus", func() {
@ -507,6 +551,14 @@ func TestPodScanner(t *testing.T) {
Convey("Return PodResources should have values", func() {
So(len(res.PodResources), ShouldBeGreaterThan, 0)
})
Convey("Return Attributes should have pod fingerprint attribute with proper value", func() {
So(len(res.Attributes), ShouldEqual, 1)
// can compute expected fringerprint only with the list of pods in the node.
expectedFingerprint, err := expectedFingerprintCompute([]*corev1.Pod{pod})
So(err, ShouldBeNil)
So(res.Attributes[0].Name, ShouldEqual, podfingerprint.Attribute)
So(res.Attributes[0].Value, ShouldEqual, expectedFingerprint)
})
expected := []PodResources{
{
@ -610,15 +662,25 @@ func TestPodScanner(t *testing.T) {
},
}
So(reflect.DeepEqual(res.PodResources, expected), ShouldBeTrue)
})
Convey("Return Attributes should have pod fingerprint attribute with proper value", func() {
So(len(res.Attributes), ShouldEqual, 1)
// can compute expected fringerprint only with the list of pods in the node.
expectedFingerprint, err := expectedFingerprintCompute([]*corev1.Pod{pod})
So(err, ShouldBeNil)
So(res.Attributes[0].Name, ShouldEqual, podfingerprint.Attribute)
So(res.Attributes[0].Value, ShouldEqual, expectedFingerprint)
})
})
})
Convey("When I scan for pod resources using fake client and given namespace", t, func() {
mockPodResClient := new(podres.MockPodResourcesListerClient)
mockAPIHelper := new(apihelper.MockAPIHelpers)
mockClient := &k8sclient.Clientset{}
resScan, err = NewPodResourcesScanner("pod-res-test", mockPodResClient, mockAPIHelper)
computePodFingerprint := false
resScan, err = NewPodResourcesScanner("pod-res-test", mockPodResClient, mockAPIHelper, computePodFingerprint)
Convey("Creating a Resources Scanner using a mock client", func() {
So(err, ShouldBeNil)

View file

@ -31,6 +31,7 @@ type Args struct {
Namespace string
KubeletConfigURI string
APIAuthTokenFile string
PodSetFingerprint bool
}
// ResourceInfo stores information of resources and their corresponding IDs obtained from PodResource API
@ -55,6 +56,7 @@ type PodResources struct {
type ScanResponse struct {
PodResources []PodResources
Attributes topologyv1alpha2.AttributeList
}
// ResourcesScanner gathers all the PodResources from the system, using the podresources API client