1
0
Fork 0
mirror of https://github.com/kubernetes-sigs/node-feature-discovery.git synced 2025-03-07 01:07:05 +00:00

Merge pull request #949 from k8stopologyawareschedwg/exclude_list

topology-updater: introduce exclude-list
This commit is contained in:
Kubernetes Prow Robot 2022-11-21 23:26:13 -08:00 committed by GitHub
commit 592d6c67d0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
23 changed files with 456 additions and 57 deletions

View file

@ -119,12 +119,20 @@ templates:
@# Need to prepend each line in the sample config with spaces in order to @# Need to prepend each line in the sample config with spaces in order to
@# fit correctly in the configmap spec. @# fit correctly in the configmap spec.
@sed s'/^/ /' deployment/components/worker-config/nfd-worker.conf.example > nfd-worker.conf.tmp @sed s'/^/ /' deployment/components/worker-config/nfd-worker.conf.example > nfd-worker.conf.tmp
@sed s'/^/ /' deployment/components/topology-updater-config/nfd-topology-updater.conf.example > nfd-topology-updater.conf.tmp
@# The sed magic below replaces the block of text between the lines with start and end markers @# The sed magic below replaces the block of text between the lines with start and end markers
@start=NFD-WORKER-CONF-START-DO-NOT-REMOVE; \ @start=NFD-WORKER-CONF-START-DO-NOT-REMOVE; \
end=NFD-WORKER-CONF-END-DO-NOT-REMOVE; \ end=NFD-WORKER-CONF-END-DO-NOT-REMOVE; \
sed -e "/$$start/,/$$end/{ /$$start/{ p; r nfd-worker.conf.tmp" \ sed -e "/$$start/,/$$end/{ /$$start/{ p; r nfd-worker.conf.tmp" \
-e "}; /$$end/p; d }" -i deployment/helm/node-feature-discovery/values.yaml -e "}; /$$end/p; d }" -i deployment/helm/node-feature-discovery/values.yaml
@start=NFD-TOPOLOGY-UPDATER-CONF-START-DO-NOT-REMOVE; \
end=NFD-TOPOLOGY-UPDATER-CONF-END-DO-NOT-REMOVE; \
sed -e "/$$start/,/$$end/{ /$$start/{ p; r nfd-topology-updater.conf.tmp" \
-e "}; /$$end/p; d }" -i deployment/helm/node-feature-discovery/values.yaml
@rm nfd-worker.conf.tmp @rm nfd-worker.conf.tmp
@rm nfd-topology-updater.conf.tmp
.generator.image.stamp: Dockerfile_generator .generator.image.stamp: Dockerfile_generator
$(IMAGE_BUILD_CMD) \ $(IMAGE_BUILD_CMD) \

View file

@ -155,6 +155,8 @@ func initFlags(flagset *flag.FlagSet) (*topology.Args, *resourcemonitor.Args) {
"NFD server address to connecto to.") "NFD server address to connecto to.")
flagset.StringVar(&args.ServerNameOverride, "server-name-override", "", flagset.StringVar(&args.ServerNameOverride, "server-name-override", "",
"Hostname expected from server certificate, useful in testing") "Hostname expected from server certificate, useful in testing")
flagset.StringVar(&args.ConfigFile, "config", "/etc/kubernetes/node-feature-discovery/nfd-topology-updater.conf",
"Config file to use.")
klog.InitFlags(flagset) klog.InitFlags(flagset)

View file

@ -34,19 +34,22 @@ func TestArgsParse(t *testing.T) {
Convey("noPublish is set and args.sources is set to the default value", func() { Convey("noPublish is set and args.sources is set to the default value", func() {
So(args.NoPublish, ShouldBeTrue) So(args.NoPublish, ShouldBeTrue)
So(args.Oneshot, ShouldBeTrue) So(args.Oneshot, ShouldBeTrue)
So(args.ConfigFile, ShouldEqual, "/etc/kubernetes/node-feature-discovery/nfd-topology-updater.conf")
So(finderArgs.SleepInterval, ShouldEqual, 60*time.Second) So(finderArgs.SleepInterval, ShouldEqual, 60*time.Second)
So(finderArgs.PodResourceSocketPath, ShouldEqual, "/var/lib/kubelet/pod-resources/kubelet.sock") So(finderArgs.PodResourceSocketPath, ShouldEqual, "/var/lib/kubelet/pod-resources/kubelet.sock")
}) })
}) })
Convey("When valid args are specified for -kubelet-config-url and -sleep-interval,", func() { Convey("When valid args are specified for -kubelet-config-url, -sleep-interval and -config,", func() {
args, finderArgs := parseArgs(flags, args, finderArgs := parseArgs(flags,
"-kubelet-config-uri=file:///path/testconfig.yaml", "-kubelet-config-uri=file:///path/testconfig.yaml",
"-sleep-interval=30s") "-sleep-interval=30s",
"-config=/path/nfd-topology-updater.conf")
Convey("args.sources is set to appropriate values", func() { Convey("args.sources is set to appropriate values", func() {
So(args.NoPublish, ShouldBeFalse) So(args.NoPublish, ShouldBeFalse)
So(args.Oneshot, ShouldBeFalse) So(args.Oneshot, ShouldBeFalse)
So(args.ConfigFile, ShouldEqual, "/path/nfd-topology-updater.conf")
So(finderArgs.SleepInterval, ShouldEqual, 30*time.Second) So(finderArgs.SleepInterval, ShouldEqual, 30*time.Second)
So(finderArgs.KubeletConfigURI, ShouldEqual, "file:///path/testconfig.yaml") So(finderArgs.KubeletConfigURI, ShouldEqual, "file:///path/testconfig.yaml")
So(finderArgs.PodResourceSocketPath, ShouldEqual, "/var/lib/kubelet/pod-resources/kubelet.sock") So(finderArgs.PodResourceSocketPath, ShouldEqual, "/var/lib/kubelet/pod-resources/kubelet.sock")

View file

@ -0,0 +1,10 @@
apiVersion: kustomize.config.k8s.io/v1alpha1
kind: Component
generatorOptions:
disableNameSuffixHash: true
configMapGenerator:
- files:
- nfd-topology-updater.conf=nfd-topology-updater.conf.example
name: nfd-topology-updater-conf

View file

@ -0,0 +1,7 @@
## key = node name, value = list of resources to be excluded.
## use * to exclude from all nodes.
## an example for how the exclude list should looks like
#excludeList:
# node1: [cpu]
# node2: [memory, example/deviceA]
# *: [hugepages-2Mi]

View file

@ -10,6 +10,9 @@
- name: kubelet-podresources-sock - name: kubelet-podresources-sock
hostPath: hostPath:
path: /var/lib/kubelet/pod-resources/kubelet.sock path: /var/lib/kubelet/pod-resources/kubelet.sock
- name: nfd-topology-updater-conf
configMap:
name: nfd-topology-updater-conf
- op: add - op: add
path: /spec/template/spec/containers/0/volumeMounts path: /spec/template/spec/containers/0/volumeMounts
@ -20,6 +23,9 @@
mountPath: /host-var/lib/kubelet/pod-resources/kubelet.sock mountPath: /host-var/lib/kubelet/pod-resources/kubelet.sock
- name: host-sys - name: host-sys
mountPath: /host-sys mountPath: /host-sys
- name: nfd-topology-updater-conf
mountPath: "/etc/kubernetes/node-feature-discovery"
readOnly: true
- op: add - op: add
path: /spec/template/spec/containers/0/args/- path: /spec/template/spec/containers/0/args/-

View file

@ -0,0 +1,10 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "node-feature-discovery.fullname" . }}-topology-updater-conf
namespace: {{ include "node-feature-discovery.namespace" . }}
labels:
{{- include "node-feature-discovery.labels" . | nindent 4 }}
data:
nfd-topology-updater.conf: |-
{{- .Values.topologyUpdater.config | toYaml | nindent 4 }}

View file

@ -68,6 +68,9 @@ spec:
mountPath: "/etc/kubernetes/node-feature-discovery/certs" mountPath: "/etc/kubernetes/node-feature-discovery/certs"
readOnly: true readOnly: true
{{- end }} {{- end }}
- name: topology-updater-conf
mountPath: "/etc/kubernetes/node-feature-discovery"
readOnly: true
resources: resources:
{{- toYaml .Values.topologyUpdater.resources | nindent 12 }} {{- toYaml .Values.topologyUpdater.resources | nindent 12 }}
@ -91,12 +94,19 @@ spec:
{{- else }} {{- else }}
path: /var/lib/kubelet/pod-resources/kubelet.sock path: /var/lib/kubelet/pod-resources/kubelet.sock
{{- end }} {{- end }}
- name: nfd-topology-updater-conf
configMap:
name: {{ include "node-feature-discovery.fullname" . }}-topology-updater-conf
items:
- key: nfd-topology-updater.conf
path: nfd-topology-updater.conf
{{- if .Values.tls.enable }} {{- if .Values.tls.enable }}
- name: nfd-topology-updater-cert - name: nfd-topology-updater-cert
secret: secret:
secretName: nfd-topology-updater-cert secretName: nfd-topology-updater-cert
{{- end }} {{- end }}
{{- with .Values.topologyUpdater.nodeSelector }} {{- with .Values.topologyUpdater.nodeSelector }}
nodeSelector: nodeSelector:
{{- toYaml . | nindent 8 }} {{- toYaml . | nindent 8 }}

View file

@ -368,6 +368,16 @@ worker:
priorityClassName: "" priorityClassName: ""
topologyUpdater: topologyUpdater:
config: ### <NFD-TOPOLOGY-UPDATER-CONF-START-DO-NOT-REMOVE>
## key = node name, value = list of resources to be excluded.
## use * to exclude from all nodes.
## an example for how the exclude list should looks like
#excludeList:
# node1: [cpu]
# node2: [memory, example/deviceA]
# *: [hugepages-2Mi]
### <NFD-TOPOLOGY-UPDATER-CONF-END-DO-NOT-REMOVE>
enable: false enable: false
createCRDs: false createCRDs: false

View file

@ -19,3 +19,4 @@ components:
- ../../components/worker-config - ../../components/worker-config
- ../../components/common - ../../components/common
- ../../components/topology-updater - ../../components/topology-updater
- ../../components/topology-updater-config

View file

@ -16,3 +16,4 @@ resources:
components: components:
- ../../components/common - ../../components/common
- ../../components/topology-updater - ../../components/topology-updater
- ../../components/topology-updater-config

View file

@ -142,24 +142,25 @@ We have introduced the following Chart parameters.
### Topology updater parameters ### Topology updater parameters
| Name | Type | Default | description | | Name | Type | Default | description |
|-----------------------------------------------|--------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------| |-----------------------------------------------|--------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `topologyUpdater.*` | dict | | NFD Topology Updater configuration | | `topologyUpdater.*` | dict | | NFD Topology Updater configuration |
| `topologyUpdater.enable` | bool | false | Specifies whether the NFD Topology Updater should be created | | `topologyUpdater.enable` | bool | false | Specifies whether the NFD Topology Updater should be created |
| `topologyUpdater.createCRDs` | bool | false | Specifies whether the NFD Topology Updater CRDs should be created | | `topologyUpdater.createCRDs` | bool | false | Specifies whether the NFD Topology Updater CRDs should be created |
| `topologyUpdater.serviceAccount.create` | bool | true | Specifies whether the service account for topology updater should be created | | `topologyUpdater.serviceAccount.create` | bool | true | Specifies whether the service account for topology updater should be created |
| `topologyUpdater.serviceAccount.annotations` | dict | {} | Annotations to add to the service account for topology updater | | `topologyUpdater.serviceAccount.annotations` | dict | {} | Annotations to add to the service account for topology updater |
| `topologyUpdater.serviceAccount.name` | string | | The name of the service account for topology updater to use. If not set and create is true, a name is generated using the fullname template and `-topology-updater` suffix | | `topologyUpdater.serviceAccount.name` | string | | The name of the service account for topology updater to use. If not set and create is true, a name is generated using the fullname template and `-topology-updater` suffix |
| `topologyUpdater.rbac` | dict | | RBAC [parameters](https://kubernetes.io/docs/reference/access-authn-authz/rbac/) for the topology updater | | `topologyUpdater.rbac` | dict | | RBAC [parameters](https://kubernetes.io/docs/reference/access-authn-authz/rbac/) for the topology updater |
| `topologyUpdater.rbac.create` | bool | false | Specifies whether the cluster role and binding for topology updater should be created | | `topologyUpdater.rbac.create` | bool | false | Specifies whether the cluster role and binding for topology updater should be created |
| `topologyUpdater.kubeletConfigPath` | string | "" | Specifies the kubelet config host path | | `topologyUpdater.kubeletConfigPath` | string | "" | Specifies the kubelet config host path |
| `topologyUpdater.kubeletPodResourcesSockPath` | string | "" | Specifies the kubelet sock path to read pod resources | | `topologyUpdater.kubeletPodResourcesSockPath` | string | "" | Specifies the kubelet sock path to read pod resources |
| `topologyUpdater.updateInterval` | string | 60s | Time to sleep between CR updates. Non-positive value implies no CR update. | | `topologyUpdater.updateInterval` | string | 60s | Time to sleep between CR updates. Non-positive value implies no CR update. |
| `topologyUpdater.watchNamespace` | string | `*` | Namespace to watch pods, `*` for all namespaces | | `topologyUpdater.watchNamespace` | string | `*` | Namespace to watch pods, `*` for all namespaces |
| `topologyUpdater.podSecurityContext` | dict | {} | [PodSecurityContext](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) holds pod-level security attributes and common container settings | | `topologyUpdater.podSecurityContext` | dict | {} | [PodSecurityContext](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) holds pod-level security attributes and common container settings |
| `topologyUpdater.securityContext` | dict | {} | Container [security settings](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) | | `topologyUpdater.securityContext` | dict | {} | Container [security settings](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) |
| `topologyUpdater.resources` | dict | {} | Topology updater pod [resources management](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/) | | `topologyUpdater.resources` | dict | {} | Topology updater pod [resources management](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/) |
| `topologyUpdater.nodeSelector` | dict | {} | Topology updater pod [node selector](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector) | | `topologyUpdater.nodeSelector` | dict | {} | Topology updater pod [node selector](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector) |
| `topologyUpdater.tolerations` | dict | {} | Topology updater pod [node tolerations](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/) | | `topologyUpdater.tolerations` | dict | {} | Topology updater pod [node tolerations](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/) |
| `topologyUpdater.annotations` | dict | {} | Topology updater pod [annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) | | `topologyUpdater.annotations` | dict | {} | Topology updater pod [annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) |
| `topologyUpdater.affinity` | dict | {} | Topology updater pod [affinity](https://kubernetes.io/docs/tasks/configure-pod-container/assign-pods-nodes-using-node-affinity/) | | `topologyUpdater.affinity` | dict | {} | Topology updater pod [affinity](https://kubernetes.io/docs/tasks/configure-pod-container/assign-pods-nodes-using-node-affinity/) |
| `topologyUpdater.config` | dict | | [configuration](../reference/topology-updater-configuration-reference) |

View file

@ -21,7 +21,8 @@ To quickly view available command line flags execute `nfd-topology-updater -help
In a docker container: In a docker container:
```bash ```bash
docker run gcr.io/k8s-staging-nfd/node-feature-discovery:master nfd-topology-updater -help docker run {{ site.container_image }} \
nfd-topology-updater -help
``` ```
### -h, -help ### -h, -help
@ -32,6 +33,19 @@ Print usage and exit.
Print version and exit. Print version and exit.
### -config
The `-config` flag specifies the path of the nfd-topology-updater
configuration file to use.
Default: /etc/kubernetes/node-feature-discovery/nfd-topology-updater.conf
Example:
```bash
nfd-topology-updater -config=/opt/nfd/nfd-topology-updater.conf
```
### -server ### -server
The `-server` flag specifies the address of the nfd-master endpoint where to The `-server` flag specifies the address of the nfd-master endpoint where to

View file

@ -0,0 +1,52 @@
---
title: "Topology-Updater config reference"
layout: default
sort: 5
---
# Configuration file reference of nfd-topology-updater
{: .no_toc}
## Table of contents
{: .no_toc .text-delta}
1. TOC
{:toc}
---
See the
[sample configuration file](https://github.com/kubernetes-sigs/node-feature-discovery/blob/{{site.release}}/deployment/components/topology-updater-config/nfd-topology-updater.conf.example)
for a full example configuration.
## excludeList
The `excludeList` specifies a key-value map of allocated resources
that should not be examined by the topology-updater
agent per node.
Each key is a node name with a value as a list of resources
that should not be examined by the agent for that specific node.
Default: *empty*
Example:
```yaml
excludeList:
nodeA: [hugepages-2Mi]
nodeB: [memory]
nodeC: [cpu, hugepages-2Mi]
```
### excludeList.*
`excludeList.*` is a special value that use to specify all nodes.
A resource that would be listed under this key, would be excluded from all nodes.
Default: *empty*
Example:
```yaml
excludeList:
'*': [hugepages-2Mi]
```

View file

@ -20,5 +20,42 @@ When run as a daemonset, nodes are re-examined for the allocated resources
(to determine the information of the allocatable resources on a per zone basis (to determine the information of the allocatable resources on a per zone basis
where a zone can be a NUMA node) at an interval specified using the where a zone can be a NUMA node) at an interval specified using the
[`-sleep-interval`](../reference/topology-updater-commandline-reference.html#-sleep-interval) [`-sleep-interval`](../reference/topology-updater-commandline-reference.html#-sleep-interval)
option. The default sleep interval is set to 60s which is the the value when no option. The default sleep interval is set to 60s which is the value when no
-sleep-interval is specified. -sleep-interval is specified.
In addition, it can avoid examining specific allocated resources
given a configuration of resources to exclude via [`-excludeList`](../reference/topology-updater-configuration-reference.md#excludelist)
## Topology-Updater Configuration
NFD-Topology-Updater supports configuration through a configuration file. The
default location is `/etc/kubernetes/node-feature-discovery/topology-updater.conf`,
but, this can be changed by specifying the`-config` command line flag.
> NOTE: unlike nfd-worker,
> dynamic configuration updates are not currently supported.
Topology-Updater configuration file is read inside the container,
and thus, Volumes and VolumeMounts are needed
to make your configuration available for NFD.
The preferred method is to use a ConfigMap
which provides easy deployment and re-configurability.
The provided nfd-topology-updater deployment templates
create an empty configmap
and mount it inside the nfd-topology-updater containers.
In kustomize deployments, configuration can be edited with:
```bash
kubectl -n ${NFD_NS} edit configmap nfd-topology-updater-conf
```
In Helm deployments,
[Topology Updater parameters](../deployment/helm.md#topology-updater-parameters)
`toplogyUpdater.config` can be used to edit the respective configuration.
See
[nfd-topology-updater configuration file reference](../reference/topology-updater-configuration-reference.md)
for more details.
The (empty-by-default)
[example config](https://github.com/kubernetes-sigs/node-feature-discovery/blob/{{site.release}}/deployment/components/topology-updater-config/nfd-topology-updater.conf.example)
contains all available configuration options and can be used as a reference
for creating a configuration.

View file

@ -62,7 +62,7 @@ for more details.
The (empty-by-default) The (empty-by-default)
[example config](https://github.com/kubernetes-sigs/node-feature-discovery/blob/{{site.release}}/deployment/components/worker-config/nfd-worker.conf.example) [example config](https://github.com/kubernetes-sigs/node-feature-discovery/blob/{{site.release}}/deployment/components/worker-config/nfd-worker.conf.example)
contains all available configuration options and can be used as a reference contains all available configuration options and can be used as a reference
for creating creating a configuration. for creating a configuration.
Configuration options can also be specified via the `-options` command line Configuration options can also be specified via the `-options` command line
flag, in which case no mounts need to be used. The same format as in the config flag, in which case no mounts need to be used. The same format as in the config

View file

@ -18,6 +18,8 @@ package topologyupdater
import ( import (
"fmt" "fmt"
"os"
"path/filepath"
"time" "time"
"k8s.io/klog/v2" "k8s.io/klog/v2"
@ -32,6 +34,7 @@ import (
pb "sigs.k8s.io/node-feature-discovery/pkg/topologyupdater" pb "sigs.k8s.io/node-feature-discovery/pkg/topologyupdater"
"sigs.k8s.io/node-feature-discovery/pkg/utils" "sigs.k8s.io/node-feature-discovery/pkg/utils"
"sigs.k8s.io/node-feature-discovery/pkg/version" "sigs.k8s.io/node-feature-discovery/pkg/version"
"sigs.k8s.io/yaml"
) )
// Args are the command line arguments // Args are the command line arguments
@ -40,6 +43,12 @@ type Args struct {
NoPublish bool NoPublish bool
Oneshot bool Oneshot bool
KubeConfigFile string KubeConfigFile string
ConfigFile string
}
// NFDConfig contains the configuration settings of NFDTopologyUpdater.
type NFDConfig struct {
ExcludeList map[string][]string
} }
type NfdTopologyUpdater interface { type NfdTopologyUpdater interface {
@ -59,6 +68,8 @@ type nfdTopologyUpdater struct {
certWatch *utils.FsWatcher certWatch *utils.FsWatcher
client pb.NodeTopologyClient client pb.NodeTopologyClient
stop chan struct{} // channel for signaling stop stop chan struct{} // channel for signaling stop
configFilePath string
config *NFDConfig
} }
// NewTopologyUpdater creates a new NfdTopologyUpdater instance. // NewTopologyUpdater creates a new NfdTopologyUpdater instance.
@ -75,7 +86,11 @@ func NewTopologyUpdater(args Args, resourcemonitorArgs resourcemonitor.Args, pol
nodeInfo: &staticNodeInfo{ nodeInfo: &staticNodeInfo{
tmPolicy: policy, tmPolicy: policy,
}, },
stop: make(chan struct{}, 1), stop: make(chan struct{}, 1),
config: &NFDConfig{},
}
if args.ConfigFile != "" {
nfd.configFilePath = filepath.Clean(args.ConfigFile)
} }
return nfd, nil return nfd, nil
} }
@ -99,6 +114,9 @@ func (w *nfdTopologyUpdater) Run() error {
} }
kubeApihelper = apihelper.K8sHelpers{Kubeconfig: kubeconfig} kubeApihelper = apihelper.K8sHelpers{Kubeconfig: kubeconfig}
} }
if err := w.configure(); err != nil {
return fmt.Errorf("faild to configure Node Feature Discovery Topology Updater: %w", err)
}
var resScan resourcemonitor.ResourcesScanner var resScan resourcemonitor.ResourcesScanner
@ -113,7 +131,8 @@ func (w *nfdTopologyUpdater) Run() error {
// zonesChannel := make(chan v1alpha1.ZoneList) // zonesChannel := make(chan v1alpha1.ZoneList)
var zones v1alpha1.ZoneList var zones v1alpha1.ZoneList
resAggr, err := resourcemonitor.NewResourcesAggregator(podResClient) excludeList := resourcemonitor.NewExcludeResourceList(w.config.ExcludeList, nfdclient.NodeName())
resAggr, err := resourcemonitor.NewResourcesAggregator(podResClient, excludeList)
if err != nil { if err != nil {
return fmt.Errorf("failed to obtain node resource information: %w", err) return fmt.Errorf("failed to obtain node resource information: %w", err)
} }
@ -245,3 +264,27 @@ func advertiseNodeTopology(client pb.NodeTopologyClient, zoneInfo v1alpha1.ZoneL
return nil return nil
} }
func (w *nfdTopologyUpdater) configure() error {
if w.configFilePath == "" {
klog.Warningf("file path for nfd-topology-updater conf file is empty")
return nil
}
b, err := os.ReadFile(w.configFilePath)
if err != nil {
// config is optional
if os.IsNotExist(err) {
klog.Warningf("couldn't find conf file under %v", w.configFilePath)
return nil
}
return err
}
err = yaml.Unmarshal(b, w.config)
if err != nil {
return fmt.Errorf("failed to parse configuration file %q: %w", w.configFilePath, err)
}
klog.Infof("configuration file %q parsed:\n %v", w.configFilePath, w.config)
return nil
}

View file

@ -0,0 +1,33 @@
package resourcemonitor
import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/klog/v2"
)
// ExcludeResourceList contains a list of resources to ignore during resources scan
type ExcludeResourceList struct {
excludeList sets.String
}
// NewExcludeResourceList returns new ExcludeList with values with set.String types
func NewExcludeResourceList(resMap map[string][]string, nodeName string) ExcludeResourceList {
excludeList := make(sets.String)
for k, v := range resMap {
if k == nodeName || k == "*" {
excludeList.Insert(v...)
}
}
return ExcludeResourceList{
excludeList: excludeList,
}
}
func (rl *ExcludeResourceList) IsExcluded(resource corev1.ResourceName) bool {
if rl.excludeList.Has(string(resource)) {
klog.V(5).InfoS("resource excluded", "resource", resource)
return true
}
return false
}

View file

@ -0,0 +1,70 @@
package resourcemonitor
import (
"testing"
corev1 "k8s.io/api/core/v1"
)
const (
cpu = string(corev1.ResourceCPU)
memory = string(corev1.ResourceMemory)
hugepages2Mi = "hugepages-2Mi"
nicResourceName = "vendor/nic1"
)
func TestNewExcludeResourceList(t *testing.T) {
tests := []struct {
desc string
excludeListConfig map[string][]string
nodeName string
expectedExcludedResources []string
}{
{
desc: "exclude list with multiple nodes",
excludeListConfig: map[string][]string{
"node1": {
cpu,
nicResourceName,
},
"node2": {
memory,
hugepages2Mi,
},
},
nodeName: "node1",
expectedExcludedResources: []string{cpu, nicResourceName},
},
{
desc: "exclude list with wild card",
excludeListConfig: map[string][]string{
"*": {
memory, nicResourceName,
},
"node1": {
cpu,
hugepages2Mi,
},
},
nodeName: "node2",
expectedExcludedResources: []string{memory, nicResourceName},
},
{
desc: "empty exclude list",
excludeListConfig: map[string][]string{},
nodeName: "node1",
expectedExcludedResources: []string{},
},
}
for _, tt := range tests {
t.Logf("test %s", tt.desc)
excludeList := NewExcludeResourceList(tt.excludeListConfig, tt.nodeName)
for _, res := range tt.expectedExcludedResources {
if !excludeList.IsExcluded(corev1.ResourceName(res)) {
t.Errorf("resource: %q expected to be excluded from node: %q", res, tt.nodeName)
}
}
}
}

View file

@ -28,8 +28,8 @@ import (
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
"k8s.io/klog/v2" "k8s.io/klog/v2"
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
"sigs.k8s.io/node-feature-discovery/pkg/utils" "sigs.k8s.io/node-feature-discovery/pkg/utils"
"sigs.k8s.io/node-feature-discovery/pkg/utils/hostpath" "sigs.k8s.io/node-feature-discovery/pkg/utils/hostpath"
) )
@ -46,6 +46,7 @@ type nodeResources struct {
topo *ghw.TopologyInfo topo *ghw.TopologyInfo
reservedCPUIDPerNUMA map[int][]string reservedCPUIDPerNUMA map[int][]string
memoryResourcesCapacityPerNUMA utils.NumaMemoryResources memoryResourcesCapacityPerNUMA utils.NumaMemoryResources
excludeList ExcludeResourceList
} }
type resourceData struct { type resourceData struct {
@ -54,7 +55,7 @@ type resourceData struct {
capacity int64 capacity int64
} }
func NewResourcesAggregator(podResourceClient podresourcesapi.PodResourcesListerClient) (ResourcesAggregator, error) { func NewResourcesAggregator(podResourceClient podresourcesapi.PodResourcesListerClient, excludeList ExcludeResourceList) (ResourcesAggregator, error) {
var err error var err error
topo, err := ghw.Topology(ghw.WithPathOverrides(ghw.PathOverrides{ topo, err := ghw.Topology(ghw.WithPathOverrides(ghw.PathOverrides{
@ -85,11 +86,11 @@ func NewResourcesAggregator(podResourceClient podresourcesapi.PodResourcesLister
return nil, fmt.Errorf("failed to get allocatable resources (ensure that KubeletPodResourcesGetAllocatable feature gate is enabled): %w", err) return nil, fmt.Errorf("failed to get allocatable resources (ensure that KubeletPodResourcesGetAllocatable feature gate is enabled): %w", err)
} }
return NewResourcesAggregatorFromData(topo, resp, memoryResourcesCapacityPerNUMA), nil return NewResourcesAggregatorFromData(topo, resp, memoryResourcesCapacityPerNUMA, excludeList), nil
} }
// NewResourcesAggregatorFromData is used to aggregate resource information based on the received data from underlying hardware and podresource API // NewResourcesAggregatorFromData is used to aggregate resource information based on the received data from underlying hardware and podresource API
func NewResourcesAggregatorFromData(topo *ghw.TopologyInfo, resp *podresourcesapi.AllocatableResourcesResponse, memoryResourceCapacity utils.NumaMemoryResources) ResourcesAggregator { func NewResourcesAggregatorFromData(topo *ghw.TopologyInfo, resp *podresourcesapi.AllocatableResourcesResponse, memoryResourceCapacity utils.NumaMemoryResources, excludeList ExcludeResourceList) ResourcesAggregator {
allDevs := getContainerDevicesFromAllocatableResources(resp, topo) allDevs := getContainerDevicesFromAllocatableResources(resp, topo)
return &nodeResources{ return &nodeResources{
topo: topo, topo: topo,
@ -97,6 +98,7 @@ func NewResourcesAggregatorFromData(topo *ghw.TopologyInfo, resp *podresourcesap
perNUMAAllocatable: makeNodeAllocatable(allDevs, resp.GetMemory()), perNUMAAllocatable: makeNodeAllocatable(allDevs, resp.GetMemory()),
reservedCPUIDPerNUMA: makeReservedCPUMap(topo.Nodes, allDevs), reservedCPUIDPerNUMA: makeReservedCPUMap(topo.Nodes, allDevs),
memoryResourcesCapacityPerNUMA: memoryResourceCapacity, memoryResourcesCapacityPerNUMA: memoryResourceCapacity,
excludeList: excludeList,
} }
} }
@ -108,6 +110,9 @@ func (noderesourceData *nodeResources) Aggregate(podResData []PodResources) topo
if ok { if ok {
perNuma[nodeID] = make(map[corev1.ResourceName]*resourceData) perNuma[nodeID] = make(map[corev1.ResourceName]*resourceData)
for resName, allocatable := range nodeRes { for resName, allocatable := range nodeRes {
if noderesourceData.excludeList.IsExcluded(resName) {
continue
}
switch { switch {
case resName == "cpu": case resName == "cpu":
perNuma[nodeID][resName] = &resourceData{ perNuma[nodeID][resName] = &resourceData{

View file

@ -178,7 +178,7 @@ func TestResourcesAggregator(t *testing.T) {
corev1.ResourceName("hugepages-2Mi"): 2048, corev1.ResourceName("hugepages-2Mi"): 2048,
}, },
} }
resAggr = NewResourcesAggregatorFromData(&fakeTopo, availRes, memoryResourcesCapacity) resAggr = NewResourcesAggregatorFromData(&fakeTopo, availRes, memoryResourcesCapacity, NewExcludeResourceList(map[string][]string{}, ""))
Convey("When aggregating resources", func() { Convey("When aggregating resources", func() {
expected := topologyv1alpha1.ZoneList{ expected := topologyv1alpha1.ZoneList{
@ -376,7 +376,7 @@ func TestResourcesAggregator(t *testing.T) {
}, },
} }
resAggr = NewResourcesAggregatorFromData(&fakeTopo, availRes, memoryResourcesCapacity) resAggr = NewResourcesAggregatorFromData(&fakeTopo, availRes, memoryResourcesCapacity, NewExcludeResourceList(map[string][]string{}, ""))
Convey("When aggregating resources", func() { Convey("When aggregating resources", func() {
podRes := []PodResources{ podRes := []PodResources{

View file

@ -27,6 +27,7 @@ import (
"github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha1" "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha1"
topologyclientset "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/generated/clientset/versioned" topologyclientset "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/generated/clientset/versioned"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
extclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" extclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -42,16 +43,17 @@ import (
var _ = SIGDescribe("Node Feature Discovery topology updater", func() { var _ = SIGDescribe("Node Feature Discovery topology updater", func() {
var ( var (
extClient *extclient.Clientset extClient *extclient.Clientset
topologyClient *topologyclientset.Clientset topologyClient *topologyclientset.Clientset
topologyUpdaterNode *corev1.Node topologyUpdaterNode *corev1.Node
workerNodes []corev1.Node topologyUpdaterDaemonSet *appsv1.DaemonSet
kubeletConfig *kubeletconfig.KubeletConfiguration workerNodes []corev1.Node
kubeletConfig *kubeletconfig.KubeletConfiguration
) )
f := framework.NewDefaultFramework("node-topology-updater") f := framework.NewDefaultFramework("node-topology-updater")
f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged
BeforeEach(func() { JustBeforeEach(func() {
var err error var err error
if extClient == nil { if extClient == nil {
@ -64,18 +66,10 @@ var _ = SIGDescribe("Node Feature Discovery topology updater", func() {
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
} }
cfg, err := testutils.GetConfig()
Expect(err).ToNot(HaveOccurred())
kcfg := cfg.GetKubeletConfig()
By(fmt.Sprintf("Using config (%#v)", kcfg))
By("Creating the node resource topologies CRD") By("Creating the node resource topologies CRD")
_, err = testutils.CreateNodeResourceTopologies(extClient) Expect(testutils.CreateNodeResourceTopologies(extClient)).ToNot(BeNil())
Expect(err).NotTo(HaveOccurred())
err = testutils.ConfigureRBAC(f.ClientSet, f.Namespace.Name) Expect(testutils.ConfigureRBAC(f.ClientSet, f.Namespace.Name)).NotTo(HaveOccurred())
Expect(err).NotTo(HaveOccurred())
image := fmt.Sprintf("%s:%s", *dockerRepo, *dockerTag) image := fmt.Sprintf("%s:%s", *dockerRepo, *dockerTag)
f.PodClient().CreateSync(testutils.NFDMasterPod(image, false)) f.PodClient().CreateSync(testutils.NFDMasterPod(image, false))
@ -88,7 +82,6 @@ var _ = SIGDescribe("Node Feature Discovery topology updater", func() {
Expect(e2enetwork.WaitForService(f.ClientSet, f.Namespace.Name, masterService.Name, true, time.Second, 10*time.Second)).NotTo(HaveOccurred()) Expect(e2enetwork.WaitForService(f.ClientSet, f.Namespace.Name, masterService.Name, true, time.Second, 10*time.Second)).NotTo(HaveOccurred())
By("Creating nfd-topology-updater daemonset") By("Creating nfd-topology-updater daemonset")
topologyUpdaterDaemonSet := testutils.NFDTopologyUpdaterDaemonSet(kcfg, fmt.Sprintf("%s:%s", *dockerRepo, *dockerTag), []string{})
topologyUpdaterDaemonSet, err = f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Create(context.TODO(), topologyUpdaterDaemonSet, metav1.CreateOptions{}) topologyUpdaterDaemonSet, err = f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Create(context.TODO(), topologyUpdaterDaemonSet, metav1.CreateOptions{})
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
@ -110,7 +103,25 @@ var _ = SIGDescribe("Node Feature Discovery topology updater", func() {
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
}) })
// TODO: replace with regular AfterEach once we have https://github.com/kubernetes/kubernetes/pull/111998 in
f.AddAfterEach("Node Feature Discovery topology updater CRD and RBAC removal", func(f *framework.Framework, failed bool) {
err := testutils.DeconfigureRBAC(f.ClientSet, f.Namespace.Name)
if err != nil {
framework.Logf("failed to delete RBAC resources: %v", err)
}
})
Context("with single nfd-master pod", func() { Context("with single nfd-master pod", func() {
BeforeEach(func() {
cfg, err := testutils.GetConfig()
Expect(err).ToNot(HaveOccurred())
kcfg := cfg.GetKubeletConfig()
By(fmt.Sprintf("Using config (%#v)", kcfg))
topologyUpdaterDaemonSet = testutils.NFDTopologyUpdaterDaemonSet(kcfg, fmt.Sprintf("%s:%s", *dockerRepo, *dockerTag), []string{})
})
It("should fill the node resource topologies CR with the data", func() { It("should fill the node resource topologies CR with the data", func() {
nodeTopology := testutils.GetNodeTopology(topologyClient, topologyUpdaterNode.Name) nodeTopology := testutils.GetNodeTopology(topologyClient, topologyUpdaterNode.Name)
isValid := testutils.IsValidNodeTopology(nodeTopology, kubeletConfig) isValid := testutils.IsValidNodeTopology(nodeTopology, kubeletConfig)
@ -247,11 +258,50 @@ var _ = SIGDescribe("Node Feature Discovery topology updater", func() {
}) })
JustAfterEach(func() { When("topology-updater configure to exclude memory", func() {
err := testutils.DeconfigureRBAC(f.ClientSet, f.Namespace.Name) var topologyUpdaterConfigMap *corev1.ConfigMap
if err != nil {
framework.Logf("failed to delete RBAC resources: %v", err) BeforeEach(func() {
} data := make(map[string]string)
data["nfd-topology-updater.conf"] = `excludeList:
'*': [memory]
`
topologyUpdaterConfigMap = &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: "nfd-topology-updater-conf",
},
Data: data,
}
cm, err := f.ClientSet.CoreV1().ConfigMaps(f.Namespace.Name).Create(context.TODO(), topologyUpdaterConfigMap, metav1.CreateOptions{})
Expect(err).ToNot(HaveOccurred())
cfg, err := testutils.GetConfig()
Expect(err).ToNot(HaveOccurred())
kcfg := cfg.GetKubeletConfig()
By(fmt.Sprintf("Using config (%#v)", kcfg))
opts := testutils.SpecWithConfigMap(cm.Name, cm.Name, "/etc/kubernetes/node-feature-discovery")
topologyUpdaterDaemonSet = testutils.NFDTopologyUpdaterDaemonSet(kcfg, fmt.Sprintf("%s:%s", *dockerRepo, *dockerTag), []string{}, opts)
})
It("noderesourcetopology should not advertise the memory resource", func() {
Eventually(func() bool {
memoryFound := false
nodeTopology := testutils.GetNodeTopology(topologyClient, topologyUpdaterNode.Name)
for _, zone := range nodeTopology.Zones {
for _, res := range zone.Resources {
if res.Name == string(corev1.ResourceMemory) {
memoryFound = true
framework.Logf("resource:%s was found for nodeTopology:%s on zone:%s while it should not", corev1.ResourceMemory, nodeTopology.Name, zone.Name)
break
}
}
}
return memoryFound
}, 1*time.Minute, 10*time.Second).Should(BeFalse())
})
}) })
}) })

View file

@ -176,11 +176,37 @@ func NFDWorkerDaemonSet(image string, extraArgs []string) *appsv1.DaemonSet {
} }
// NFDTopologyUpdaterDaemonSet provides the NFD daemon set topology updater // NFDTopologyUpdaterDaemonSet provides the NFD daemon set topology updater
func NFDTopologyUpdaterDaemonSet(kc KubeletConfig, image string, extraArgs []string) *appsv1.DaemonSet { func NFDTopologyUpdaterDaemonSet(kc KubeletConfig, image string, extraArgs []string, options ...func(spec *corev1.PodSpec)) *appsv1.DaemonSet {
podSpec := nfdTopologyUpdaterPodSpec(kc, image, extraArgs) podSpec := nfdTopologyUpdaterPodSpec(kc, image, extraArgs)
for _, o := range options {
o(podSpec)
}
return newDaemonSet("nfd-topology-updater", podSpec) return newDaemonSet("nfd-topology-updater", podSpec)
} }
func SpecWithConfigMap(cmName, volumeName, mountPath string) func(spec *corev1.PodSpec) {
return func(spec *corev1.PodSpec) {
spec.Volumes = append(spec.Volumes,
corev1.Volume{
Name: volumeName,
VolumeSource: corev1.VolumeSource{
ConfigMap: &corev1.ConfigMapVolumeSource{
LocalObjectReference: corev1.LocalObjectReference{
Name: cmName,
},
},
},
})
cnt := &spec.Containers[0]
cnt.VolumeMounts = append(cnt.VolumeMounts,
corev1.VolumeMount{
Name: volumeName,
ReadOnly: true,
MountPath: mountPath,
})
}
}
// newDaemonSet provide the new daemon set // newDaemonSet provide the new daemon set
func newDaemonSet(name string, podSpec *corev1.PodSpec) *appsv1.DaemonSet { func newDaemonSet(name string, podSpec *corev1.PodSpec) *appsv1.DaemonSet {
return &appsv1.DaemonSet{ return &appsv1.DaemonSet{