1
0
Fork 0
mirror of https://github.com/kubernetes-sigs/node-feature-discovery.git synced 2024-12-14 11:57:51 +00:00

Drop dynamic run-time reconfiguration

Simplify the code and reduce possible error scenarios by dropping
fsnotify-based reconfiguration from nfd-master and nfd-worker. Also
eliminates repeated re-configuration in scenarios where kubelet
continuosly touches the (every minute) mounted file (configmap) on the
filesystem.

Also modifies the Helm and kustomize deployments so that nfd-master,
nfd-worker and nfd-topology-updater pods are restarted on configmap
updates. In kustomize, the slght downside of this is the name of the
config map(s) depends on the content, so every time a user customizes
the config data, the old unused configmap will be left and must be
garbage-collected manually.
This commit is contained in:
Markus Lehtonen 2024-08-19 16:11:09 +03:00
parent 4db3216a80
commit 02b6b7395c
15 changed files with 35 additions and 350 deletions

View file

@ -1,9 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1alpha1
kind: Component
generatorOptions:
disableNameSuffixHash: true
configMapGenerator:
- files:
- nfd-master.conf=nfd-master.conf.example

View file

@ -1,9 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1alpha1
kind: Component
generatorOptions:
disableNameSuffixHash: true
configMapGenerator:
- files:
- nfd-topology-updater.conf=nfd-topology-updater.conf.example

View file

@ -1,9 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1alpha1
kind: Component
generatorOptions:
disableNameSuffixHash: true
configMapGenerator:
- files:
- nfd-worker.conf=nfd-worker.conf.example

View file

@ -23,10 +23,11 @@ spec:
labels:
{{- include "node-feature-discovery.selectorLabels" . | nindent 8 }}
role: master
{{- with .Values.master.annotations }}
annotations:
checksum/config: {{ include (print $.Template.BasePath "/nfd-master-conf.yaml") . | sha256sum }}
{{- with .Values.master.annotations }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
spec:
{{- with .Values.priorityClassName }}
priorityClassName: {{ . }}

View file

@ -22,10 +22,11 @@ spec:
labels:
{{- include "node-feature-discovery.selectorLabels" . | nindent 8 }}
role: topology-updater
{{- with .Values.topologyUpdater.annotations }}
annotations:
checksum/config: {{ include (print $.Template.BasePath "/nfd-topologyupdater-conf.yaml") . | sha256sum }}
{{- with .Values.topologyUpdater.annotations }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
spec:
serviceAccountName: {{ include "node-feature-discovery.topologyUpdater.serviceAccountName" . }}
dnsPolicy: ClusterFirstWithHostNet

View file

@ -22,10 +22,11 @@ spec:
labels:
{{- include "node-feature-discovery.selectorLabels" . | nindent 8 }}
role: worker
{{- with .Values.worker.annotations }}
annotations:
checksum/config: {{ include (print $.Template.BasePath "/nfd-worker-conf.yaml") . | sha256sum }}
{{- with .Values.worker.annotations }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
spec:
dnsPolicy: ClusterFirstWithHostNet
{{- with .Values.priorityClassName }}

View file

@ -151,8 +151,7 @@ core:
### core.klog
The following options specify the logger configuration. Most of which can be
dynamically adjusted at run-time.
The following options specify the logger configuration.
> **NOTE:** The logger options can also be specified via command line flags
> which take precedence over any corresponding config file options.
@ -163,40 +162,30 @@ If true, adds the file directory to the header of the log messages.
Default: `false`
Run-time configurable: yes
#### core.klog.alsologtostderr
Log to standard error as well as files.
Default: `false`
Run-time configurable: yes
#### core.klog.logBacktraceAt
When logging hits line file:N, emit a stack trace.
Default: *empty*
Run-time configurable: yes
#### core.klog.logDir
If non-empty, write log files in this directory.
Default: *empty*
Run-time configurable: no
#### core.klog.logFile
If non-empty, use this log file.
Default: *empty*
Run-time configurable: no
#### core.klog.logFileMaxSize
Defines the maximum size a log file can grow to. Unit is megabytes. If the
@ -204,54 +193,40 @@ value is 0, the maximum file size is unlimited.
Default: `1800`
Run-time configurable: no
#### core.klog.logtostderr
Log to standard error instead of files
Default: `true`
Run-time configurable: yes
#### core.klog.skipHeaders
If true, avoid header prefixes in the log messages.
Default: `false`
Run-time configurable: yes
#### core.klog.skipLogHeaders
If true, avoid headers when opening log files.
Default: `false`
Run-time configurable: no
#### core.klog.stderrthreshold
Logs at or above this threshold go to stderr (default 2)
Run-time configurable: yes
#### core.klog.v
Number for the log level verbosity.
Default: `0`
Run-time configurable: yes
#### core.klog.vmodule
Comma-separated list of `pattern=N` settings for file-filtered logging.
Default: *empty*
Run-time configurable: yes
## sources
The `sources` section contains feature source specific configuration parameters.

View file

@ -32,29 +32,28 @@ received from nfd-worker instances through
## Master configuration
NFD-Master supports dynamic configuration through a configuration file. The
NFD-Master supports configuration through a configuration file. The
default location is `/etc/kubernetes/node-feature-discovery/nfd-master.conf`,
but, this can be changed by specifying the`-config` command line flag.
Configuration file is re-read whenever it is modified which makes run-time
re-configuration of nfd-master straightforward.
Master configuration file is read inside the container, and thus, Volumes and
VolumeMounts are needed to make your configuration available for NFD. The
preferred method is to use a ConfigMap which provides easy deployment and
re-configurability.
The provided nfd-master deployment templates create an empty configmap and
mount it inside the nfd-master containers. In kustomize deployments,
configuration can be edited with:
```bash
kubectl -n ${NFD_NS} edit configmap nfd-master-conf
```
The provided deployment methods (Helm and Kustomize) create an empty configmap
and mount it inside the nfd-master containers.
In Helm deployments,
[Master pod parameter](../deployment/helm.md#master-pod-parameters)
`master.config` can be used to edit the respective configuration.
In Kustomize deployments, modify the `nfd-master-conf` ConfigMap with a custom
overlay.
> **NOTE:** dynamic run-time reconfiguration was dropped in NFD v0.17.
> Re-configuration is handled by pod restarts.
See
[nfd-master configuration file reference](../reference/master-configuration-reference.md)
for more details.

View file

@ -62,27 +62,22 @@ NFD-Topology-Updater supports configuration through a configuration file. The
default location is `/etc/kubernetes/node-feature-discovery/topology-updater.conf`,
but, this can be changed by specifying the`-config` command line flag.
> **NOTE:** unlike nfd-worker, dynamic configuration updates are not supported.
Topology-Updater configuration file is read inside the container,
and thus, Volumes and VolumeMounts are needed
to make your configuration available for NFD.
The preferred method is to use a ConfigMap
which provides easy deployment and re-configurability.
The provided nfd-topology-updater deployment templates
create an empty configmap
The provided deployment templates create an empty configmap
and mount it inside the nfd-topology-updater containers.
In kustomize deployments, configuration can be edited with:
```bash
kubectl -n ${NFD_NS} edit configmap nfd-topology-updater-conf
```
In Helm deployments,
[Topology Updater parameters](../deployment/helm.md#topology-updater-parameters)
`toplogyUpdater.config` can be used to edit the respective configuration.
In Kustomize deployments, modify the `nfd-worker-conf` ConfigMap with a custom
overlay.
See
[nfd-topology-updater configuration file reference](../reference/topology-updater-configuration-reference.md)
for more details.

View file

@ -19,13 +19,9 @@ This can be changed by using the
[`core.sleepInterval`](../reference/worker-configuration-reference.md#coresleepinterval)
config option.
The worker configuration file is watched and re-read on every change which
provides a mechanism of dynamic run-time reconfiguration. See
[worker configuration](#worker-configuration) for more details.
## Worker configuration
NFD-Worker supports dynamic configuration through a configuration file. The
NFD-Worker supports configuration through a configuration file. The
default location is `/etc/kubernetes/node-feature-discovery/nfd-worker.conf`,
but, this can be changed by specifying the`-config` command line flag.
Configuration file is re-read whenever it is modified which makes run-time
@ -36,18 +32,19 @@ VolumeMounts are needed to make your configuration available for NFD. The
preferred method is to use a ConfigMap which provides easy deployment and
re-configurability.
The provided nfd-worker deployment templates create an empty configmap and
mount it inside the nfd-worker containers. In kustomize deployments,
configuration can be edited with:
```bash
kubectl -n ${NFD_NS} edit configmap nfd-worker-conf
```
The provided deployment methods (Helm and Kustomize) create an empty configmap
and mount it inside the nfd-master containers.
In Helm deployments,
[Worker pod parameter](../deployment/helm.md#worker-pod-parameters)
`worker.config` can be used to edit the respective configuration.
In Kustomize deployments, modify the `nfd-worker-conf` ConfigMap with a custom
overlay.
> **NOTE:** dynamic run-time reconfiguration was dropped in NFD v0.17.
> Re-configuration is handled by pod restarts.
See
[nfd-worker configuration file reference](../reference/worker-configuration-reference)
for more details.

View file

@ -21,7 +21,6 @@ import (
"fmt"
"maps"
"os"
"path/filepath"
"sort"
"strings"
"testing"
@ -665,96 +664,6 @@ leaderElection:
})
}
func TestDynamicConfig(t *testing.T) {
Convey("When running nfd-master", t, func() {
// Add feature gates as running nfd-master depends on that
err := features.NFDMutableFeatureGate.Add(features.DefaultNFDFeatureGates)
So(err, ShouldBeNil)
tmpDir, err := os.MkdirTemp("", "*.nfd-test")
So(err, ShouldBeNil)
defer os.RemoveAll(tmpDir)
// Create (temporary) dir for config
configDir := filepath.Join(tmpDir, "subdir-1", "subdir-2", "master.conf")
err = os.MkdirAll(configDir, 0755)
So(err, ShouldBeNil)
// Create config file
configFile := filepath.Clean(filepath.Join(configDir, "master.conf"))
writeConfig := func(data string) {
f, err := os.Create(configFile)
So(err, ShouldBeNil)
_, err = f.WriteString(data)
So(err, ShouldBeNil)
err = f.Close()
So(err, ShouldBeNil)
}
writeConfig(`
klog:
v: "4"
extraLabelNs: ["added.ns.io"]
`)
master := newFakeMaster(
WithArgs(&Args{ConfigFile: configFile}),
WithKubernetesClient(fakeclient.NewSimpleClientset(newTestNode())))
Convey("config file updates should take effect", func() {
go func() {
Convey("nfd-master should exit gracefully", t, func() {
err = master.Run()
So(err, ShouldBeNil)
})
}()
defer master.Stop()
// Check initial config
time.Sleep(10 * time.Second)
So(func() interface{} { return master.config.ExtraLabelNs },
withTimeout, 2*time.Second, ShouldResemble, utils.StringSetVal{"added.ns.io": struct{}{}})
// Update config and verify the effect
writeConfig(`
extraLabelNs: ["override.ns.io"]
resyncPeriod: '2h'
nfdApiParallelism: 300
`)
So(func() interface{} { return master.config.ExtraLabelNs },
withTimeout, 2*time.Second, ShouldResemble, utils.StringSetVal{"override.ns.io": struct{}{}})
So(func() interface{} { return master.config.ResyncPeriod.Duration },
withTimeout, 2*time.Second, ShouldResemble, time.Duration(2)*time.Hour)
So(func() interface{} { return master.config.NfdApiParallelism },
withTimeout, 2*time.Second, ShouldResemble, 300)
// Removing config file should get back our defaults
err = os.RemoveAll(tmpDir)
So(err, ShouldBeNil)
So(func() interface{} { return master.config.ExtraLabelNs },
withTimeout, 2*time.Second, ShouldResemble, utils.StringSetVal{})
So(func() interface{} { return master.config.ResyncPeriod.Duration },
withTimeout, 2*time.Second, ShouldResemble, time.Duration(1)*time.Hour)
So(func() interface{} { return master.config.NfdApiParallelism },
withTimeout, 2*time.Second, ShouldResemble, 10)
// Re-creating config dir and file should change the config
err = os.MkdirAll(configDir, 0755)
So(err, ShouldBeNil)
writeConfig(`
extraLabelNs: ["another.override.ns"]
resyncPeriod: '3m'
nfdApiParallelism: 100
`)
So(func() interface{} { return master.config.ExtraLabelNs },
withTimeout, 2*time.Second, ShouldResemble, utils.StringSetVal{"another.override.ns": struct{}{}})
So(func() interface{} { return master.config.ResyncPeriod.Duration },
withTimeout, 2*time.Second, ShouldResemble, time.Duration(3)*time.Minute)
So(func() interface{} { return master.config.NfdApiParallelism },
withTimeout, 2*time.Second, ShouldResemble, 100)
})
})
}
func newTestNodeList() *corev1.NodeList {
l := corev1.NodeList{}

View file

@ -284,7 +284,7 @@ func (m *nfdMaster) Run() error {
klog.InfoS("Master instance", "instance", m.args.Instance)
}
// Read initial configuration
// Read configuration
if err := m.configure(m.configFilePath, m.args.Options); err != nil {
return err
}
@ -302,12 +302,6 @@ func (m *nfdMaster) Run() error {
m.updaterPool.start(m.config.NfdApiParallelism)
// Create watcher for config file
configWatch, err := utils.CreateFsWatcher(time.Second, m.configFilePath)
if err != nil {
return err
}
if !m.config.NoPublish {
err := m.updateMasterNode()
if err != nil {
@ -365,36 +359,6 @@ func (m *nfdMaster) Run() error {
case err := <-grpcErr:
return fmt.Errorf("error in serving gRPC: %w", err)
case <-configWatch.Events:
klog.InfoS("reloading configuration")
if err := m.configure(m.configFilePath, m.args.Options); err != nil {
return err
}
// Stop the updaterPool so that no node updates are underway
// while we reconfigure the NFD API controller (including the
// listers) below
m.updaterPool.stop()
// restart NFD API controller
if m.nfdController != nil {
klog.InfoS("stopping the nfd api controller")
m.nfdController.stop()
}
if m.args.CrdController {
err := m.startNfdApiController()
if err != nil {
return nil
}
}
// Restart the updaterPool
m.updaterPool.start(m.config.NfdApiParallelism)
// Update all nodes when the configuration changes
if m.nfdController != nil && nfdfeatures.NFDFeatureGate.Enabled(nfdfeatures.NodeFeatureAPI) {
m.nfdController.updateAllNodes()
}
case <-m.stop:
klog.InfoS("shutting down nfd-master")
return nil

View file

@ -18,7 +18,6 @@ package nfdworker
import (
"os"
"path/filepath"
"regexp"
"strings"
"testing"
@ -195,113 +194,6 @@ sources:
})
}
func TestDynamicConfig(t *testing.T) {
Convey("When running nfd-worker", t, func() {
tmpDir, err := os.MkdirTemp("", "*.nfd-test")
So(err, ShouldBeNil)
defer os.RemoveAll(tmpDir)
// Create (temporary) dir for config
configDir := filepath.Join(tmpDir, "subdir-1", "subdir-2", "worker.conf")
err = os.MkdirAll(configDir, 0755)
So(err, ShouldBeNil)
// Create config file
configFile := filepath.Join(configDir, "worker.conf")
writeConfig := func(data string) {
f, err := os.Create(configFile)
So(err, ShouldBeNil)
_, err = f.WriteString(data)
So(err, ShouldBeNil)
err = f.Close()
So(err, ShouldBeNil)
}
writeConfig(`
core:
labelWhiteList: "fake"
`)
noPublish := true
w, err := NewNfdWorker(WithArgs(&Args{
ConfigFile: configFile,
Overrides: ConfigOverrideArgs{
FeatureSources: &utils.StringSliceVal{"fake"},
LabelSources: &utils.StringSliceVal{"fake"},
NoPublish: &noPublish},
}), WithKubernetesClient(fakeclient.NewSimpleClientset()))
So(err, ShouldBeNil)
worker := w.(*nfdWorker)
Convey("config file updates should take effect", func() {
go func() { _ = w.Run() }()
defer w.Stop()
// Check initial config
So(func() interface{} { return worker.config.Core.LabelWhiteList.String() },
withTimeout, 2*time.Second, ShouldEqual, "fake")
// Update config and verify the effect
writeConfig(`
core:
labelWhiteList: "foo"
`)
So(func() interface{} { return worker.config.Core.LabelWhiteList.String() },
withTimeout, 2*time.Second, ShouldEqual, "foo")
// Removing config file should get back our defaults
err = os.RemoveAll(tmpDir)
So(err, ShouldBeNil)
So(func() interface{} { return worker.config.Core.LabelWhiteList.String() },
withTimeout, 2*time.Second, ShouldEqual, "")
// Re-creating config dir and file should change the config
err = os.MkdirAll(configDir, 0755)
So(err, ShouldBeNil)
writeConfig(`
core:
labelWhiteList: "bar"
`)
So(func() interface{} { return worker.config.Core.LabelWhiteList.String() },
withTimeout, 2*time.Second, ShouldEqual, "bar")
})
})
}
// withTimeout is a custom assertion for polling a value asynchronously
// actual is a function for getting the actual value
// expected[0] is a time.Duration value specifying the timeout
// expected[1] is the "real" assertion function to be called
// expected[2:] are the arguments for the "real" assertion function
func withTimeout(actual interface{}, expected ...interface{}) string {
getter, ok := actual.(func() interface{})
if !ok {
return "not getterFunc"
}
t, ok := expected[0].(time.Duration)
if !ok {
return "not time.Duration"
}
f, ok := expected[1].(func(interface{}, ...interface{}) string)
if !ok {
return "not an assert func"
}
timeout := time.After(t)
for {
result := f(getter(), expected[2:]...)
if result == "" {
return ""
}
select {
case <-timeout:
return result
case <-time.After(10 * time.Millisecond):
}
}
}
func TestNewNfdWorker(t *testing.T) {
Convey("When creating new NfdWorker instance", t, func() {

View file

@ -297,14 +297,11 @@ func (w *nfdWorker) runFeatureDiscovery() error {
func (w *nfdWorker) Run() error {
klog.InfoS("Node Feature Discovery Worker", "version", version.Get(), "nodeName", utils.NodeName(), "namespace", w.kubernetesNamespace)
// Create watcher for config file and read initial configuration
configWatch, err := utils.CreateFsWatcher(time.Second, w.configFilePath)
// Read configuration file
err := w.configure(w.configFilePath, w.args.Options)
if err != nil {
return err
}
if err := w.configure(w.configFilePath, w.args.Options); err != nil {
return err
}
// Create watcher for TLS certificates
w.certWatch, err = utils.CreateFsWatcher(time.Second, w.args.CaFile, w.args.CertFile, w.args.KeyFile)
@ -390,24 +387,6 @@ func (w *nfdWorker) Run() error {
return err
}
case <-configWatch.Events:
klog.InfoS("reloading configuration")
if err := w.configure(w.configFilePath, w.args.Options); err != nil {
return err
}
// Manage connection to master
if w.config.Core.NoPublish || !features.NFDFeatureGate.Enabled(features.NodeFeatureAPI) {
w.grpcDisconnect()
}
// Always re-label after a re-config event. This way the new config
// comes into effect even if the sleep interval is long (or infinite)
labelTrigger.Reset(w.config.Core.SleepInterval.Duration)
err = w.runFeatureDiscovery()
if err != nil {
return err
}
case <-w.certWatch.Events:
klog.InfoS("TLS certificate update, renewing connection to nfd-master")
w.grpcDisconnect()
@ -417,7 +396,6 @@ func (w *nfdWorker) Run() error {
if w.healthServer != nil {
w.healthServer.GracefulStop()
}
configWatch.Close()
w.certWatch.Close()
return nil
}

View file

@ -940,24 +940,6 @@ denyLabelNs: ["*.denied.ns","random.unwanted.ns"]
By("Deleting NodeFeature object")
err = nfdClient.NfdV1alpha1().NodeFeatures(f.Namespace.Name).Delete(ctx, nodeFeatures[0], metav1.DeleteOptions{})
Expect(err).NotTo(HaveOccurred())
// TODO: Find a better way to handle the timeout that happens to reflect the configmap changes
Skip("Testing the master dynamic configuration")
// Verify that config changes were applied
By("Updating the master config")
Expect(testutils.UpdateConfigMap(ctx, f.ClientSet, "nfd-master-conf", f.Namespace.Name, "nfd-master.conf", `
denyLabelNs: []
`))
By("Verifying that denied labels were removed")
expectedLabels = map[string]k8sLabels{
targetNodeName: {
nfdv1alpha1.FeatureLabelNs + "/e2e-nodefeature-test-4": "obj-4",
"custom.vendor.io/e2e-nodefeature-test-3": "vendor-ns",
"random.denied.ns/e2e-nodefeature-test-1": "denied-ns",
"random.unwanted.ns/e2e-nodefeature-test-2": "unwanted-ns",
},
}
eventuallyNonControlPlaneNodes(ctx, f.ClientSet).Should(MatchLabels(expectedLabels, nodes))
})
})