mirror of
https://github.com/kubernetes-sigs/node-feature-discovery.git
synced 2025-03-06 16:57:10 +00:00
Merge pull request #949 from k8stopologyawareschedwg/exclude_list
topology-updater: introduce exclude-list
This commit is contained in:
commit
592d6c67d0
23 changed files with 456 additions and 57 deletions
8
Makefile
8
Makefile
|
@ -119,12 +119,20 @@ templates:
|
|||
@# Need to prepend each line in the sample config with spaces in order to
|
||||
@# fit correctly in the configmap spec.
|
||||
@sed s'/^/ /' deployment/components/worker-config/nfd-worker.conf.example > nfd-worker.conf.tmp
|
||||
@sed s'/^/ /' deployment/components/topology-updater-config/nfd-topology-updater.conf.example > nfd-topology-updater.conf.tmp
|
||||
@# The sed magic below replaces the block of text between the lines with start and end markers
|
||||
@start=NFD-WORKER-CONF-START-DO-NOT-REMOVE; \
|
||||
end=NFD-WORKER-CONF-END-DO-NOT-REMOVE; \
|
||||
sed -e "/$$start/,/$$end/{ /$$start/{ p; r nfd-worker.conf.tmp" \
|
||||
-e "}; /$$end/p; d }" -i deployment/helm/node-feature-discovery/values.yaml
|
||||
@start=NFD-TOPOLOGY-UPDATER-CONF-START-DO-NOT-REMOVE; \
|
||||
end=NFD-TOPOLOGY-UPDATER-CONF-END-DO-NOT-REMOVE; \
|
||||
sed -e "/$$start/,/$$end/{ /$$start/{ p; r nfd-topology-updater.conf.tmp" \
|
||||
-e "}; /$$end/p; d }" -i deployment/helm/node-feature-discovery/values.yaml
|
||||
@rm nfd-worker.conf.tmp
|
||||
@rm nfd-topology-updater.conf.tmp
|
||||
|
||||
|
||||
|
||||
.generator.image.stamp: Dockerfile_generator
|
||||
$(IMAGE_BUILD_CMD) \
|
||||
|
|
|
@ -155,6 +155,8 @@ func initFlags(flagset *flag.FlagSet) (*topology.Args, *resourcemonitor.Args) {
|
|||
"NFD server address to connecto to.")
|
||||
flagset.StringVar(&args.ServerNameOverride, "server-name-override", "",
|
||||
"Hostname expected from server certificate, useful in testing")
|
||||
flagset.StringVar(&args.ConfigFile, "config", "/etc/kubernetes/node-feature-discovery/nfd-topology-updater.conf",
|
||||
"Config file to use.")
|
||||
|
||||
klog.InitFlags(flagset)
|
||||
|
||||
|
|
|
@ -34,19 +34,22 @@ func TestArgsParse(t *testing.T) {
|
|||
Convey("noPublish is set and args.sources is set to the default value", func() {
|
||||
So(args.NoPublish, ShouldBeTrue)
|
||||
So(args.Oneshot, ShouldBeTrue)
|
||||
So(args.ConfigFile, ShouldEqual, "/etc/kubernetes/node-feature-discovery/nfd-topology-updater.conf")
|
||||
So(finderArgs.SleepInterval, ShouldEqual, 60*time.Second)
|
||||
So(finderArgs.PodResourceSocketPath, ShouldEqual, "/var/lib/kubelet/pod-resources/kubelet.sock")
|
||||
})
|
||||
})
|
||||
|
||||
Convey("When valid args are specified for -kubelet-config-url and -sleep-interval,", func() {
|
||||
Convey("When valid args are specified for -kubelet-config-url, -sleep-interval and -config,", func() {
|
||||
args, finderArgs := parseArgs(flags,
|
||||
"-kubelet-config-uri=file:///path/testconfig.yaml",
|
||||
"-sleep-interval=30s")
|
||||
"-sleep-interval=30s",
|
||||
"-config=/path/nfd-topology-updater.conf")
|
||||
|
||||
Convey("args.sources is set to appropriate values", func() {
|
||||
So(args.NoPublish, ShouldBeFalse)
|
||||
So(args.Oneshot, ShouldBeFalse)
|
||||
So(args.ConfigFile, ShouldEqual, "/path/nfd-topology-updater.conf")
|
||||
So(finderArgs.SleepInterval, ShouldEqual, 30*time.Second)
|
||||
So(finderArgs.KubeletConfigURI, ShouldEqual, "file:///path/testconfig.yaml")
|
||||
So(finderArgs.PodResourceSocketPath, ShouldEqual, "/var/lib/kubelet/pod-resources/kubelet.sock")
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
apiVersion: kustomize.config.k8s.io/v1alpha1
|
||||
kind: Component
|
||||
|
||||
generatorOptions:
|
||||
disableNameSuffixHash: true
|
||||
|
||||
configMapGenerator:
|
||||
- files:
|
||||
- nfd-topology-updater.conf=nfd-topology-updater.conf.example
|
||||
name: nfd-topology-updater-conf
|
|
@ -0,0 +1,7 @@
|
|||
## key = node name, value = list of resources to be excluded.
|
||||
## use * to exclude from all nodes.
|
||||
## an example for how the exclude list should looks like
|
||||
#excludeList:
|
||||
# node1: [cpu]
|
||||
# node2: [memory, example/deviceA]
|
||||
# *: [hugepages-2Mi]
|
|
@ -10,6 +10,9 @@
|
|||
- name: kubelet-podresources-sock
|
||||
hostPath:
|
||||
path: /var/lib/kubelet/pod-resources/kubelet.sock
|
||||
- name: nfd-topology-updater-conf
|
||||
configMap:
|
||||
name: nfd-topology-updater-conf
|
||||
|
||||
- op: add
|
||||
path: /spec/template/spec/containers/0/volumeMounts
|
||||
|
@ -20,6 +23,9 @@
|
|||
mountPath: /host-var/lib/kubelet/pod-resources/kubelet.sock
|
||||
- name: host-sys
|
||||
mountPath: /host-sys
|
||||
- name: nfd-topology-updater-conf
|
||||
mountPath: "/etc/kubernetes/node-feature-discovery"
|
||||
readOnly: true
|
||||
|
||||
- op: add
|
||||
path: /spec/template/spec/containers/0/args/-
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-topology-updater-conf
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
data:
|
||||
nfd-topology-updater.conf: |-
|
||||
{{- .Values.topologyUpdater.config | toYaml | nindent 4 }}
|
|
@ -68,6 +68,9 @@ spec:
|
|||
mountPath: "/etc/kubernetes/node-feature-discovery/certs"
|
||||
readOnly: true
|
||||
{{- end }}
|
||||
- name: topology-updater-conf
|
||||
mountPath: "/etc/kubernetes/node-feature-discovery"
|
||||
readOnly: true
|
||||
|
||||
resources:
|
||||
{{- toYaml .Values.topologyUpdater.resources | nindent 12 }}
|
||||
|
@ -91,12 +94,19 @@ spec:
|
|||
{{- else }}
|
||||
path: /var/lib/kubelet/pod-resources/kubelet.sock
|
||||
{{- end }}
|
||||
- name: nfd-topology-updater-conf
|
||||
configMap:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-topology-updater-conf
|
||||
items:
|
||||
- key: nfd-topology-updater.conf
|
||||
path: nfd-topology-updater.conf
|
||||
{{- if .Values.tls.enable }}
|
||||
- name: nfd-topology-updater-cert
|
||||
secret:
|
||||
secretName: nfd-topology-updater-cert
|
||||
{{- end }}
|
||||
|
||||
|
||||
{{- with .Values.topologyUpdater.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
|
|
|
@ -368,6 +368,16 @@ worker:
|
|||
priorityClassName: ""
|
||||
|
||||
topologyUpdater:
|
||||
config: ### <NFD-TOPOLOGY-UPDATER-CONF-START-DO-NOT-REMOVE>
|
||||
## key = node name, value = list of resources to be excluded.
|
||||
## use * to exclude from all nodes.
|
||||
## an example for how the exclude list should looks like
|
||||
#excludeList:
|
||||
# node1: [cpu]
|
||||
# node2: [memory, example/deviceA]
|
||||
# *: [hugepages-2Mi]
|
||||
### <NFD-TOPOLOGY-UPDATER-CONF-END-DO-NOT-REMOVE>
|
||||
|
||||
enable: false
|
||||
createCRDs: false
|
||||
|
||||
|
|
|
@ -19,3 +19,4 @@ components:
|
|||
- ../../components/worker-config
|
||||
- ../../components/common
|
||||
- ../../components/topology-updater
|
||||
- ../../components/topology-updater-config
|
||||
|
|
|
@ -16,3 +16,4 @@ resources:
|
|||
components:
|
||||
- ../../components/common
|
||||
- ../../components/topology-updater
|
||||
- ../../components/topology-updater-config
|
||||
|
|
|
@ -142,24 +142,25 @@ We have introduced the following Chart parameters.
|
|||
|
||||
### Topology updater parameters
|
||||
|
||||
| Name | Type | Default | description |
|
||||
|-----------------------------------------------|--------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `topologyUpdater.*` | dict | | NFD Topology Updater configuration |
|
||||
| `topologyUpdater.enable` | bool | false | Specifies whether the NFD Topology Updater should be created |
|
||||
| `topologyUpdater.createCRDs` | bool | false | Specifies whether the NFD Topology Updater CRDs should be created |
|
||||
| `topologyUpdater.serviceAccount.create` | bool | true | Specifies whether the service account for topology updater should be created |
|
||||
| `topologyUpdater.serviceAccount.annotations` | dict | {} | Annotations to add to the service account for topology updater |
|
||||
| `topologyUpdater.serviceAccount.name` | string | | The name of the service account for topology updater to use. If not set and create is true, a name is generated using the fullname template and `-topology-updater` suffix |
|
||||
| `topologyUpdater.rbac` | dict | | RBAC [parameters](https://kubernetes.io/docs/reference/access-authn-authz/rbac/) for the topology updater |
|
||||
| `topologyUpdater.rbac.create` | bool | false | Specifies whether the cluster role and binding for topology updater should be created |
|
||||
| `topologyUpdater.kubeletConfigPath` | string | "" | Specifies the kubelet config host path |
|
||||
| `topologyUpdater.kubeletPodResourcesSockPath` | string | "" | Specifies the kubelet sock path to read pod resources |
|
||||
| `topologyUpdater.updateInterval` | string | 60s | Time to sleep between CR updates. Non-positive value implies no CR update. |
|
||||
| `topologyUpdater.watchNamespace` | string | `*` | Namespace to watch pods, `*` for all namespaces |
|
||||
| `topologyUpdater.podSecurityContext` | dict | {} | [PodSecurityContext](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) holds pod-level security attributes and common container settings |
|
||||
| `topologyUpdater.securityContext` | dict | {} | Container [security settings](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) |
|
||||
| `topologyUpdater.resources` | dict | {} | Topology updater pod [resources management](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/) |
|
||||
| `topologyUpdater.nodeSelector` | dict | {} | Topology updater pod [node selector](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector) |
|
||||
| `topologyUpdater.tolerations` | dict | {} | Topology updater pod [node tolerations](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/) |
|
||||
| `topologyUpdater.annotations` | dict | {} | Topology updater pod [annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) |
|
||||
| `topologyUpdater.affinity` | dict | {} | Topology updater pod [affinity](https://kubernetes.io/docs/tasks/configure-pod-container/assign-pods-nodes-using-node-affinity/) |
|
||||
| Name | Type | Default | description |
|
||||
|-----------------------------------------------|--------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `topologyUpdater.*` | dict | | NFD Topology Updater configuration |
|
||||
| `topologyUpdater.enable` | bool | false | Specifies whether the NFD Topology Updater should be created |
|
||||
| `topologyUpdater.createCRDs` | bool | false | Specifies whether the NFD Topology Updater CRDs should be created |
|
||||
| `topologyUpdater.serviceAccount.create` | bool | true | Specifies whether the service account for topology updater should be created |
|
||||
| `topologyUpdater.serviceAccount.annotations` | dict | {} | Annotations to add to the service account for topology updater |
|
||||
| `topologyUpdater.serviceAccount.name` | string | | The name of the service account for topology updater to use. If not set and create is true, a name is generated using the fullname template and `-topology-updater` suffix |
|
||||
| `topologyUpdater.rbac` | dict | | RBAC [parameters](https://kubernetes.io/docs/reference/access-authn-authz/rbac/) for the topology updater |
|
||||
| `topologyUpdater.rbac.create` | bool | false | Specifies whether the cluster role and binding for topology updater should be created |
|
||||
| `topologyUpdater.kubeletConfigPath` | string | "" | Specifies the kubelet config host path |
|
||||
| `topologyUpdater.kubeletPodResourcesSockPath` | string | "" | Specifies the kubelet sock path to read pod resources |
|
||||
| `topologyUpdater.updateInterval` | string | 60s | Time to sleep between CR updates. Non-positive value implies no CR update. |
|
||||
| `topologyUpdater.watchNamespace` | string | `*` | Namespace to watch pods, `*` for all namespaces |
|
||||
| `topologyUpdater.podSecurityContext` | dict | {} | [PodSecurityContext](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) holds pod-level security attributes and common container settings |
|
||||
| `topologyUpdater.securityContext` | dict | {} | Container [security settings](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) |
|
||||
| `topologyUpdater.resources` | dict | {} | Topology updater pod [resources management](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/) |
|
||||
| `topologyUpdater.nodeSelector` | dict | {} | Topology updater pod [node selector](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector) |
|
||||
| `topologyUpdater.tolerations` | dict | {} | Topology updater pod [node tolerations](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/) |
|
||||
| `topologyUpdater.annotations` | dict | {} | Topology updater pod [annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) |
|
||||
| `topologyUpdater.affinity` | dict | {} | Topology updater pod [affinity](https://kubernetes.io/docs/tasks/configure-pod-container/assign-pods-nodes-using-node-affinity/) |
|
||||
| `topologyUpdater.config` | dict | | [configuration](../reference/topology-updater-configuration-reference) |
|
||||
|
|
|
@ -21,7 +21,8 @@ To quickly view available command line flags execute `nfd-topology-updater -help
|
|||
In a docker container:
|
||||
|
||||
```bash
|
||||
docker run gcr.io/k8s-staging-nfd/node-feature-discovery:master nfd-topology-updater -help
|
||||
docker run {{ site.container_image }} \
|
||||
nfd-topology-updater -help
|
||||
```
|
||||
|
||||
### -h, -help
|
||||
|
@ -32,6 +33,19 @@ Print usage and exit.
|
|||
|
||||
Print version and exit.
|
||||
|
||||
### -config
|
||||
|
||||
The `-config` flag specifies the path of the nfd-topology-updater
|
||||
configuration file to use.
|
||||
|
||||
Default: /etc/kubernetes/node-feature-discovery/nfd-topology-updater.conf
|
||||
|
||||
Example:
|
||||
|
||||
```bash
|
||||
nfd-topology-updater -config=/opt/nfd/nfd-topology-updater.conf
|
||||
```
|
||||
|
||||
### -server
|
||||
|
||||
The `-server` flag specifies the address of the nfd-master endpoint where to
|
||||
|
|
52
docs/reference/topology-updater-configuration-reference.md
Normal file
52
docs/reference/topology-updater-configuration-reference.md
Normal file
|
@ -0,0 +1,52 @@
|
|||
---
|
||||
title: "Topology-Updater config reference"
|
||||
layout: default
|
||||
sort: 5
|
||||
---
|
||||
|
||||
# Configuration file reference of nfd-topology-updater
|
||||
{: .no_toc}
|
||||
|
||||
## Table of contents
|
||||
{: .no_toc .text-delta}
|
||||
|
||||
1. TOC
|
||||
{:toc}
|
||||
|
||||
---
|
||||
|
||||
See the
|
||||
[sample configuration file](https://github.com/kubernetes-sigs/node-feature-discovery/blob/{{site.release}}/deployment/components/topology-updater-config/nfd-topology-updater.conf.example)
|
||||
for a full example configuration.
|
||||
|
||||
## excludeList
|
||||
|
||||
The `excludeList` specifies a key-value map of allocated resources
|
||||
that should not be examined by the topology-updater
|
||||
agent per node.
|
||||
Each key is a node name with a value as a list of resources
|
||||
that should not be examined by the agent for that specific node.
|
||||
|
||||
Default: *empty*
|
||||
|
||||
Example:
|
||||
|
||||
```yaml
|
||||
excludeList:
|
||||
nodeA: [hugepages-2Mi]
|
||||
nodeB: [memory]
|
||||
nodeC: [cpu, hugepages-2Mi]
|
||||
```
|
||||
|
||||
### excludeList.*
|
||||
`excludeList.*` is a special value that use to specify all nodes.
|
||||
A resource that would be listed under this key, would be excluded from all nodes.
|
||||
|
||||
Default: *empty*
|
||||
|
||||
Example:
|
||||
|
||||
```yaml
|
||||
excludeList:
|
||||
'*': [hugepages-2Mi]
|
||||
```
|
|
@ -20,5 +20,42 @@ When run as a daemonset, nodes are re-examined for the allocated resources
|
|||
(to determine the information of the allocatable resources on a per zone basis
|
||||
where a zone can be a NUMA node) at an interval specified using the
|
||||
[`-sleep-interval`](../reference/topology-updater-commandline-reference.html#-sleep-interval)
|
||||
option. The default sleep interval is set to 60s which is the the value when no
|
||||
option. The default sleep interval is set to 60s which is the value when no
|
||||
-sleep-interval is specified.
|
||||
In addition, it can avoid examining specific allocated resources
|
||||
given a configuration of resources to exclude via [`-excludeList`](../reference/topology-updater-configuration-reference.md#excludelist)
|
||||
|
||||
## Topology-Updater Configuration
|
||||
|
||||
NFD-Topology-Updater supports configuration through a configuration file. The
|
||||
default location is `/etc/kubernetes/node-feature-discovery/topology-updater.conf`,
|
||||
but, this can be changed by specifying the`-config` command line flag.
|
||||
> NOTE: unlike nfd-worker,
|
||||
> dynamic configuration updates are not currently supported.
|
||||
|
||||
Topology-Updater configuration file is read inside the container,
|
||||
and thus, Volumes and VolumeMounts are needed
|
||||
to make your configuration available for NFD.
|
||||
The preferred method is to use a ConfigMap
|
||||
which provides easy deployment and re-configurability.
|
||||
|
||||
The provided nfd-topology-updater deployment templates
|
||||
create an empty configmap
|
||||
and mount it inside the nfd-topology-updater containers.
|
||||
In kustomize deployments, configuration can be edited with:
|
||||
|
||||
```bash
|
||||
kubectl -n ${NFD_NS} edit configmap nfd-topology-updater-conf
|
||||
```
|
||||
|
||||
In Helm deployments,
|
||||
[Topology Updater parameters](../deployment/helm.md#topology-updater-parameters)
|
||||
`toplogyUpdater.config` can be used to edit the respective configuration.
|
||||
|
||||
See
|
||||
[nfd-topology-updater configuration file reference](../reference/topology-updater-configuration-reference.md)
|
||||
for more details.
|
||||
The (empty-by-default)
|
||||
[example config](https://github.com/kubernetes-sigs/node-feature-discovery/blob/{{site.release}}/deployment/components/topology-updater-config/nfd-topology-updater.conf.example)
|
||||
contains all available configuration options and can be used as a reference
|
||||
for creating a configuration.
|
|
@ -62,7 +62,7 @@ for more details.
|
|||
The (empty-by-default)
|
||||
[example config](https://github.com/kubernetes-sigs/node-feature-discovery/blob/{{site.release}}/deployment/components/worker-config/nfd-worker.conf.example)
|
||||
contains all available configuration options and can be used as a reference
|
||||
for creating creating a configuration.
|
||||
for creating a configuration.
|
||||
|
||||
Configuration options can also be specified via the `-options` command line
|
||||
flag, in which case no mounts need to be used. The same format as in the config
|
||||
|
|
|
@ -18,6 +18,8 @@ package topologyupdater
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"k8s.io/klog/v2"
|
||||
|
@ -32,6 +34,7 @@ import (
|
|||
pb "sigs.k8s.io/node-feature-discovery/pkg/topologyupdater"
|
||||
"sigs.k8s.io/node-feature-discovery/pkg/utils"
|
||||
"sigs.k8s.io/node-feature-discovery/pkg/version"
|
||||
"sigs.k8s.io/yaml"
|
||||
)
|
||||
|
||||
// Args are the command line arguments
|
||||
|
@ -40,6 +43,12 @@ type Args struct {
|
|||
NoPublish bool
|
||||
Oneshot bool
|
||||
KubeConfigFile string
|
||||
ConfigFile string
|
||||
}
|
||||
|
||||
// NFDConfig contains the configuration settings of NFDTopologyUpdater.
|
||||
type NFDConfig struct {
|
||||
ExcludeList map[string][]string
|
||||
}
|
||||
|
||||
type NfdTopologyUpdater interface {
|
||||
|
@ -59,6 +68,8 @@ type nfdTopologyUpdater struct {
|
|||
certWatch *utils.FsWatcher
|
||||
client pb.NodeTopologyClient
|
||||
stop chan struct{} // channel for signaling stop
|
||||
configFilePath string
|
||||
config *NFDConfig
|
||||
}
|
||||
|
||||
// NewTopologyUpdater creates a new NfdTopologyUpdater instance.
|
||||
|
@ -75,7 +86,11 @@ func NewTopologyUpdater(args Args, resourcemonitorArgs resourcemonitor.Args, pol
|
|||
nodeInfo: &staticNodeInfo{
|
||||
tmPolicy: policy,
|
||||
},
|
||||
stop: make(chan struct{}, 1),
|
||||
stop: make(chan struct{}, 1),
|
||||
config: &NFDConfig{},
|
||||
}
|
||||
if args.ConfigFile != "" {
|
||||
nfd.configFilePath = filepath.Clean(args.ConfigFile)
|
||||
}
|
||||
return nfd, nil
|
||||
}
|
||||
|
@ -99,6 +114,9 @@ func (w *nfdTopologyUpdater) Run() error {
|
|||
}
|
||||
kubeApihelper = apihelper.K8sHelpers{Kubeconfig: kubeconfig}
|
||||
}
|
||||
if err := w.configure(); err != nil {
|
||||
return fmt.Errorf("faild to configure Node Feature Discovery Topology Updater: %w", err)
|
||||
}
|
||||
|
||||
var resScan resourcemonitor.ResourcesScanner
|
||||
|
||||
|
@ -113,7 +131,8 @@ func (w *nfdTopologyUpdater) Run() error {
|
|||
// zonesChannel := make(chan v1alpha1.ZoneList)
|
||||
var zones v1alpha1.ZoneList
|
||||
|
||||
resAggr, err := resourcemonitor.NewResourcesAggregator(podResClient)
|
||||
excludeList := resourcemonitor.NewExcludeResourceList(w.config.ExcludeList, nfdclient.NodeName())
|
||||
resAggr, err := resourcemonitor.NewResourcesAggregator(podResClient, excludeList)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to obtain node resource information: %w", err)
|
||||
}
|
||||
|
@ -245,3 +264,27 @@ func advertiseNodeTopology(client pb.NodeTopologyClient, zoneInfo v1alpha1.ZoneL
|
|||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *nfdTopologyUpdater) configure() error {
|
||||
if w.configFilePath == "" {
|
||||
klog.Warningf("file path for nfd-topology-updater conf file is empty")
|
||||
return nil
|
||||
}
|
||||
|
||||
b, err := os.ReadFile(w.configFilePath)
|
||||
if err != nil {
|
||||
// config is optional
|
||||
if os.IsNotExist(err) {
|
||||
klog.Warningf("couldn't find conf file under %v", w.configFilePath)
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
err = yaml.Unmarshal(b, w.config)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse configuration file %q: %w", w.configFilePath, err)
|
||||
}
|
||||
klog.Infof("configuration file %q parsed:\n %v", w.configFilePath, w.config)
|
||||
return nil
|
||||
}
|
||||
|
|
33
pkg/resourcemonitor/excludelist.go
Normal file
33
pkg/resourcemonitor/excludelist.go
Normal file
|
@ -0,0 +1,33 @@
|
|||
package resourcemonitor
|
||||
|
||||
import (
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
// ExcludeResourceList contains a list of resources to ignore during resources scan
|
||||
type ExcludeResourceList struct {
|
||||
excludeList sets.String
|
||||
}
|
||||
|
||||
// NewExcludeResourceList returns new ExcludeList with values with set.String types
|
||||
func NewExcludeResourceList(resMap map[string][]string, nodeName string) ExcludeResourceList {
|
||||
excludeList := make(sets.String)
|
||||
for k, v := range resMap {
|
||||
if k == nodeName || k == "*" {
|
||||
excludeList.Insert(v...)
|
||||
}
|
||||
}
|
||||
return ExcludeResourceList{
|
||||
excludeList: excludeList,
|
||||
}
|
||||
}
|
||||
|
||||
func (rl *ExcludeResourceList) IsExcluded(resource corev1.ResourceName) bool {
|
||||
if rl.excludeList.Has(string(resource)) {
|
||||
klog.V(5).InfoS("resource excluded", "resource", resource)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
70
pkg/resourcemonitor/excludelist_test.go
Normal file
70
pkg/resourcemonitor/excludelist_test.go
Normal file
|
@ -0,0 +1,70 @@
|
|||
package resourcemonitor
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
)
|
||||
|
||||
const (
|
||||
cpu = string(corev1.ResourceCPU)
|
||||
memory = string(corev1.ResourceMemory)
|
||||
hugepages2Mi = "hugepages-2Mi"
|
||||
nicResourceName = "vendor/nic1"
|
||||
)
|
||||
|
||||
func TestNewExcludeResourceList(t *testing.T) {
|
||||
tests := []struct {
|
||||
desc string
|
||||
excludeListConfig map[string][]string
|
||||
nodeName string
|
||||
expectedExcludedResources []string
|
||||
}{
|
||||
{
|
||||
|
||||
desc: "exclude list with multiple nodes",
|
||||
excludeListConfig: map[string][]string{
|
||||
"node1": {
|
||||
cpu,
|
||||
nicResourceName,
|
||||
},
|
||||
"node2": {
|
||||
memory,
|
||||
hugepages2Mi,
|
||||
},
|
||||
},
|
||||
nodeName: "node1",
|
||||
expectedExcludedResources: []string{cpu, nicResourceName},
|
||||
},
|
||||
{
|
||||
desc: "exclude list with wild card",
|
||||
excludeListConfig: map[string][]string{
|
||||
"*": {
|
||||
memory, nicResourceName,
|
||||
},
|
||||
"node1": {
|
||||
cpu,
|
||||
hugepages2Mi,
|
||||
},
|
||||
},
|
||||
nodeName: "node2",
|
||||
expectedExcludedResources: []string{memory, nicResourceName},
|
||||
},
|
||||
{
|
||||
desc: "empty exclude list",
|
||||
excludeListConfig: map[string][]string{},
|
||||
nodeName: "node1",
|
||||
expectedExcludedResources: []string{},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Logf("test %s", tt.desc)
|
||||
excludeList := NewExcludeResourceList(tt.excludeListConfig, tt.nodeName)
|
||||
for _, res := range tt.expectedExcludedResources {
|
||||
if !excludeList.IsExcluded(corev1.ResourceName(res)) {
|
||||
t.Errorf("resource: %q expected to be excluded from node: %q", res, tt.nodeName)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -28,8 +28,8 @@ import (
|
|||
corev1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/klog/v2"
|
||||
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
|
||||
|
||||
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
|
||||
"sigs.k8s.io/node-feature-discovery/pkg/utils"
|
||||
"sigs.k8s.io/node-feature-discovery/pkg/utils/hostpath"
|
||||
)
|
||||
|
@ -46,6 +46,7 @@ type nodeResources struct {
|
|||
topo *ghw.TopologyInfo
|
||||
reservedCPUIDPerNUMA map[int][]string
|
||||
memoryResourcesCapacityPerNUMA utils.NumaMemoryResources
|
||||
excludeList ExcludeResourceList
|
||||
}
|
||||
|
||||
type resourceData struct {
|
||||
|
@ -54,7 +55,7 @@ type resourceData struct {
|
|||
capacity int64
|
||||
}
|
||||
|
||||
func NewResourcesAggregator(podResourceClient podresourcesapi.PodResourcesListerClient) (ResourcesAggregator, error) {
|
||||
func NewResourcesAggregator(podResourceClient podresourcesapi.PodResourcesListerClient, excludeList ExcludeResourceList) (ResourcesAggregator, error) {
|
||||
var err error
|
||||
|
||||
topo, err := ghw.Topology(ghw.WithPathOverrides(ghw.PathOverrides{
|
||||
|
@ -85,11 +86,11 @@ func NewResourcesAggregator(podResourceClient podresourcesapi.PodResourcesLister
|
|||
return nil, fmt.Errorf("failed to get allocatable resources (ensure that KubeletPodResourcesGetAllocatable feature gate is enabled): %w", err)
|
||||
}
|
||||
|
||||
return NewResourcesAggregatorFromData(topo, resp, memoryResourcesCapacityPerNUMA), nil
|
||||
return NewResourcesAggregatorFromData(topo, resp, memoryResourcesCapacityPerNUMA, excludeList), nil
|
||||
}
|
||||
|
||||
// NewResourcesAggregatorFromData is used to aggregate resource information based on the received data from underlying hardware and podresource API
|
||||
func NewResourcesAggregatorFromData(topo *ghw.TopologyInfo, resp *podresourcesapi.AllocatableResourcesResponse, memoryResourceCapacity utils.NumaMemoryResources) ResourcesAggregator {
|
||||
func NewResourcesAggregatorFromData(topo *ghw.TopologyInfo, resp *podresourcesapi.AllocatableResourcesResponse, memoryResourceCapacity utils.NumaMemoryResources, excludeList ExcludeResourceList) ResourcesAggregator {
|
||||
allDevs := getContainerDevicesFromAllocatableResources(resp, topo)
|
||||
return &nodeResources{
|
||||
topo: topo,
|
||||
|
@ -97,6 +98,7 @@ func NewResourcesAggregatorFromData(topo *ghw.TopologyInfo, resp *podresourcesap
|
|||
perNUMAAllocatable: makeNodeAllocatable(allDevs, resp.GetMemory()),
|
||||
reservedCPUIDPerNUMA: makeReservedCPUMap(topo.Nodes, allDevs),
|
||||
memoryResourcesCapacityPerNUMA: memoryResourceCapacity,
|
||||
excludeList: excludeList,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -108,6 +110,9 @@ func (noderesourceData *nodeResources) Aggregate(podResData []PodResources) topo
|
|||
if ok {
|
||||
perNuma[nodeID] = make(map[corev1.ResourceName]*resourceData)
|
||||
for resName, allocatable := range nodeRes {
|
||||
if noderesourceData.excludeList.IsExcluded(resName) {
|
||||
continue
|
||||
}
|
||||
switch {
|
||||
case resName == "cpu":
|
||||
perNuma[nodeID][resName] = &resourceData{
|
||||
|
|
|
@ -178,7 +178,7 @@ func TestResourcesAggregator(t *testing.T) {
|
|||
corev1.ResourceName("hugepages-2Mi"): 2048,
|
||||
},
|
||||
}
|
||||
resAggr = NewResourcesAggregatorFromData(&fakeTopo, availRes, memoryResourcesCapacity)
|
||||
resAggr = NewResourcesAggregatorFromData(&fakeTopo, availRes, memoryResourcesCapacity, NewExcludeResourceList(map[string][]string{}, ""))
|
||||
|
||||
Convey("When aggregating resources", func() {
|
||||
expected := topologyv1alpha1.ZoneList{
|
||||
|
@ -376,7 +376,7 @@ func TestResourcesAggregator(t *testing.T) {
|
|||
},
|
||||
}
|
||||
|
||||
resAggr = NewResourcesAggregatorFromData(&fakeTopo, availRes, memoryResourcesCapacity)
|
||||
resAggr = NewResourcesAggregatorFromData(&fakeTopo, availRes, memoryResourcesCapacity, NewExcludeResourceList(map[string][]string{}, ""))
|
||||
|
||||
Convey("When aggregating resources", func() {
|
||||
podRes := []PodResources{
|
||||
|
|
|
@ -27,6 +27,7 @@ import (
|
|||
"github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha1"
|
||||
topologyclientset "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/generated/clientset/versioned"
|
||||
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
extclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
@ -42,16 +43,17 @@ import (
|
|||
|
||||
var _ = SIGDescribe("Node Feature Discovery topology updater", func() {
|
||||
var (
|
||||
extClient *extclient.Clientset
|
||||
topologyClient *topologyclientset.Clientset
|
||||
topologyUpdaterNode *corev1.Node
|
||||
workerNodes []corev1.Node
|
||||
kubeletConfig *kubeletconfig.KubeletConfiguration
|
||||
extClient *extclient.Clientset
|
||||
topologyClient *topologyclientset.Clientset
|
||||
topologyUpdaterNode *corev1.Node
|
||||
topologyUpdaterDaemonSet *appsv1.DaemonSet
|
||||
workerNodes []corev1.Node
|
||||
kubeletConfig *kubeletconfig.KubeletConfiguration
|
||||
)
|
||||
|
||||
f := framework.NewDefaultFramework("node-topology-updater")
|
||||
f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged
|
||||
BeforeEach(func() {
|
||||
JustBeforeEach(func() {
|
||||
var err error
|
||||
|
||||
if extClient == nil {
|
||||
|
@ -64,18 +66,10 @@ var _ = SIGDescribe("Node Feature Discovery topology updater", func() {
|
|||
Expect(err).NotTo(HaveOccurred())
|
||||
}
|
||||
|
||||
cfg, err := testutils.GetConfig()
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
kcfg := cfg.GetKubeletConfig()
|
||||
By(fmt.Sprintf("Using config (%#v)", kcfg))
|
||||
|
||||
By("Creating the node resource topologies CRD")
|
||||
_, err = testutils.CreateNodeResourceTopologies(extClient)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(testutils.CreateNodeResourceTopologies(extClient)).ToNot(BeNil())
|
||||
|
||||
err = testutils.ConfigureRBAC(f.ClientSet, f.Namespace.Name)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(testutils.ConfigureRBAC(f.ClientSet, f.Namespace.Name)).NotTo(HaveOccurred())
|
||||
|
||||
image := fmt.Sprintf("%s:%s", *dockerRepo, *dockerTag)
|
||||
f.PodClient().CreateSync(testutils.NFDMasterPod(image, false))
|
||||
|
@ -88,7 +82,6 @@ var _ = SIGDescribe("Node Feature Discovery topology updater", func() {
|
|||
Expect(e2enetwork.WaitForService(f.ClientSet, f.Namespace.Name, masterService.Name, true, time.Second, 10*time.Second)).NotTo(HaveOccurred())
|
||||
|
||||
By("Creating nfd-topology-updater daemonset")
|
||||
topologyUpdaterDaemonSet := testutils.NFDTopologyUpdaterDaemonSet(kcfg, fmt.Sprintf("%s:%s", *dockerRepo, *dockerTag), []string{})
|
||||
topologyUpdaterDaemonSet, err = f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Create(context.TODO(), topologyUpdaterDaemonSet, metav1.CreateOptions{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
|
@ -110,7 +103,25 @@ var _ = SIGDescribe("Node Feature Discovery topology updater", func() {
|
|||
Expect(err).NotTo(HaveOccurred())
|
||||
})
|
||||
|
||||
// TODO: replace with regular AfterEach once we have https://github.com/kubernetes/kubernetes/pull/111998 in
|
||||
f.AddAfterEach("Node Feature Discovery topology updater CRD and RBAC removal", func(f *framework.Framework, failed bool) {
|
||||
err := testutils.DeconfigureRBAC(f.ClientSet, f.Namespace.Name)
|
||||
if err != nil {
|
||||
framework.Logf("failed to delete RBAC resources: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
Context("with single nfd-master pod", func() {
|
||||
BeforeEach(func() {
|
||||
cfg, err := testutils.GetConfig()
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
kcfg := cfg.GetKubeletConfig()
|
||||
By(fmt.Sprintf("Using config (%#v)", kcfg))
|
||||
|
||||
topologyUpdaterDaemonSet = testutils.NFDTopologyUpdaterDaemonSet(kcfg, fmt.Sprintf("%s:%s", *dockerRepo, *dockerTag), []string{})
|
||||
})
|
||||
|
||||
It("should fill the node resource topologies CR with the data", func() {
|
||||
nodeTopology := testutils.GetNodeTopology(topologyClient, topologyUpdaterNode.Name)
|
||||
isValid := testutils.IsValidNodeTopology(nodeTopology, kubeletConfig)
|
||||
|
@ -247,11 +258,50 @@ var _ = SIGDescribe("Node Feature Discovery topology updater", func() {
|
|||
|
||||
})
|
||||
|
||||
JustAfterEach(func() {
|
||||
err := testutils.DeconfigureRBAC(f.ClientSet, f.Namespace.Name)
|
||||
if err != nil {
|
||||
framework.Logf("failed to delete RBAC resources: %v", err)
|
||||
}
|
||||
When("topology-updater configure to exclude memory", func() {
|
||||
var topologyUpdaterConfigMap *corev1.ConfigMap
|
||||
|
||||
BeforeEach(func() {
|
||||
data := make(map[string]string)
|
||||
data["nfd-topology-updater.conf"] = `excludeList:
|
||||
'*': [memory]
|
||||
`
|
||||
topologyUpdaterConfigMap = &corev1.ConfigMap{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "nfd-topology-updater-conf",
|
||||
},
|
||||
Data: data,
|
||||
}
|
||||
|
||||
cm, err := f.ClientSet.CoreV1().ConfigMaps(f.Namespace.Name).Create(context.TODO(), topologyUpdaterConfigMap, metav1.CreateOptions{})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
cfg, err := testutils.GetConfig()
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
kcfg := cfg.GetKubeletConfig()
|
||||
By(fmt.Sprintf("Using config (%#v)", kcfg))
|
||||
|
||||
opts := testutils.SpecWithConfigMap(cm.Name, cm.Name, "/etc/kubernetes/node-feature-discovery")
|
||||
topologyUpdaterDaemonSet = testutils.NFDTopologyUpdaterDaemonSet(kcfg, fmt.Sprintf("%s:%s", *dockerRepo, *dockerTag), []string{}, opts)
|
||||
})
|
||||
|
||||
It("noderesourcetopology should not advertise the memory resource", func() {
|
||||
Eventually(func() bool {
|
||||
memoryFound := false
|
||||
nodeTopology := testutils.GetNodeTopology(topologyClient, topologyUpdaterNode.Name)
|
||||
for _, zone := range nodeTopology.Zones {
|
||||
for _, res := range zone.Resources {
|
||||
if res.Name == string(corev1.ResourceMemory) {
|
||||
memoryFound = true
|
||||
framework.Logf("resource:%s was found for nodeTopology:%s on zone:%s while it should not", corev1.ResourceMemory, nodeTopology.Name, zone.Name)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return memoryFound
|
||||
}, 1*time.Minute, 10*time.Second).Should(BeFalse())
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
|
|
|
@ -176,11 +176,37 @@ func NFDWorkerDaemonSet(image string, extraArgs []string) *appsv1.DaemonSet {
|
|||
}
|
||||
|
||||
// NFDTopologyUpdaterDaemonSet provides the NFD daemon set topology updater
|
||||
func NFDTopologyUpdaterDaemonSet(kc KubeletConfig, image string, extraArgs []string) *appsv1.DaemonSet {
|
||||
func NFDTopologyUpdaterDaemonSet(kc KubeletConfig, image string, extraArgs []string, options ...func(spec *corev1.PodSpec)) *appsv1.DaemonSet {
|
||||
podSpec := nfdTopologyUpdaterPodSpec(kc, image, extraArgs)
|
||||
for _, o := range options {
|
||||
o(podSpec)
|
||||
}
|
||||
return newDaemonSet("nfd-topology-updater", podSpec)
|
||||
}
|
||||
|
||||
func SpecWithConfigMap(cmName, volumeName, mountPath string) func(spec *corev1.PodSpec) {
|
||||
return func(spec *corev1.PodSpec) {
|
||||
spec.Volumes = append(spec.Volumes,
|
||||
corev1.Volume{
|
||||
Name: volumeName,
|
||||
VolumeSource: corev1.VolumeSource{
|
||||
ConfigMap: &corev1.ConfigMapVolumeSource{
|
||||
LocalObjectReference: corev1.LocalObjectReference{
|
||||
Name: cmName,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
cnt := &spec.Containers[0]
|
||||
cnt.VolumeMounts = append(cnt.VolumeMounts,
|
||||
corev1.VolumeMount{
|
||||
Name: volumeName,
|
||||
ReadOnly: true,
|
||||
MountPath: mountPath,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// newDaemonSet provide the new daemon set
|
||||
func newDaemonSet(name string, podSpec *corev1.PodSpec) *appsv1.DaemonSet {
|
||||
return &appsv1.DaemonSet{
|
||||
|
|
Loading…
Add table
Reference in a new issue