1
0
Fork 0
mirror of https://github.com/kubernetes-sigs/node-feature-discovery.git synced 2024-12-14 11:57:51 +00:00

feat: add master resync period configurability

This PR adds a config option for setting the NFD API controller resync period.
The resync period is only activated when the NodeFeature API has been
enabled (with -enable-nodefeature-api).

Signed-off-by: AhmedGrati <ahmedgrati1999@gmail.com>
This commit is contained in:
AhmedGrati 2023-04-15 16:11:59 +01:00
parent 018cd33306
commit 7917434d38
12 changed files with 118 additions and 37 deletions

View file

@ -20,6 +20,7 @@ import (
"flag"
"fmt"
"os"
"time"
"k8s.io/klog/v2"
@ -67,6 +68,8 @@ func main() {
args.Overrides.EnableTaints = overrides.EnableTaints
case "no-publish":
args.Overrides.NoPublish = overrides.NoPublish
case "-resync-period":
args.Overrides.ResyncPeriod = overrides.ResyncPeriod
}
})
@ -131,6 +134,7 @@ func initFlags(flagset *flag.FlagSet) (*master.Args, *master.ConfigOverrideArgs)
DenyLabelNs: &utils.StringSetVal{},
ExtraLabelNs: &utils.StringSetVal{},
ResourceLabels: &utils.StringSetVal{},
ResyncPeriod: &utils.DurationVal{Duration: time.Duration(1) * time.Hour},
}
flagset.Var(overrides.ExtraLabelNs, "extra-label-ns",
"Comma separated list of allowed extra label namespaces")
@ -145,6 +149,8 @@ func initFlags(flagset *flag.FlagSet) (*master.Args, *master.ConfigOverrideArgs)
"Comma separated list of denied label namespaces")
flagset.Var(overrides.ResourceLabels, "resource-labels",
"Comma separated list of labels to be exposed as extended resources. DEPRECATED: use NodeFeatureRule objects instead")
flagset.Var(overrides.ResyncPeriod, "resync-period",
"Specify the NFD API controller resync period."+
"It has an effect when the NodeFeature API has been enabled (with -enable-nodefeature-api).")
return args, overrides
}

View file

@ -4,3 +4,4 @@
# resourceLabels: ["vendor-1.com/feature-1","vendor-2.io/feature-2"]
# enableTaints: false
# labelWhiteList: "foo"
# resyncPeriod: "2h"

View file

@ -104,6 +104,9 @@ spec:
{{- if .Values.master.featureRulesController | kindIs "invalid" | not }}
- "-featurerules-controller={{ .Values.master.featureRulesController }}"
{{- end }}
{{- if .Values.master.resyncPeriod }}
- "-resync-period={{ .Values.master.resyncPeriod }}"
{{- end }}
{{- if .Values.tls.enable }}
- "-ca-file=/etc/kubernetes/node-feature-discovery/certs/ca.crt"
- "-key-file=/etc/kubernetes/node-feature-discovery/certs/tls.key"

View file

@ -20,11 +20,13 @@ master:
# resourceLabels: ["vendor-1.com/feature-1","vendor-2.io/feature-2"]
# enableTaints: false
# labelWhiteList: "foo"
# resyncPeriod: "2h"
### <NFD-MASTER-CONF-END-DO-NOT-REMOVE>
# The TCP port that nfd-master listens for incoming requests. Default: 8080
port: 8080
instance:
featureApi:
resyncPeriod:
denyLabelNs: []
extraLabelNs: []
resourceLabels: []

View file

@ -110,6 +110,7 @@ We have introduced the following Chart parameters.
| `master.*` | dict | | NFD master deployment configuration |
| `master.port` | integer | | Specifies the TCP port that nfd-master listens for incoming requests. |
| `master.instance` | string | | Instance name. Used to separate annotation namespaces for multiple parallel deployments |
| `master.resyncPeriod` | string | | NFD API controller resync period. |
| `master.extraLabelNs` | array | [] | List of allowed extra label namespaces |
| `master.resourceLabels` | array | [] | List of labels to be registered as extended resources |
| `master.enableTaints` | bool | false | Specifies whether to enable or disable node tainting |

View file

@ -376,3 +376,20 @@ Default: 0
Comma-separated list of `pattern=N` settings for file-filtered logging.
Default: *empty*
### -resync-period
The `-resync-period` flag specifies the NFD API controller resync period.
The resync means nfd-master replaying all NodeFeature and NodeFeatureRule objects,
thus effectively re-syncing all nodes in the cluster (i.e. ensuring labels, annotations,
extended resources and taints are in place).
Only has effect when the [NodeFeature](../usage/custom-resources.md#nodefeature)
CRD API has been enabled with [`-enable-nodefeature-api`](master-commandline-reference.md#-enable-nodefeature-api).
Default: 1 hour.
Example:
```bash
nfd-master -resync-period=2h
```

View file

@ -113,3 +113,20 @@ Example:
```yaml
labelWhiteList: "foo"
```
### resyncPeriod
The `resyncPeriod` option specifies the NFD API controller resync period.
The resync means nfd-master replaying all NodeFeature and NodeFeatureRule objects,
thus effectively re-syncing all nodes in the cluster (i.e. ensuring labels, annotations,
extended resources and taints are in place).
Only has effect when the [NodeFeature](../usage/custom-resources.md#nodefeature)
CRD API has been enabled with [`-enable-nodefeature-api`](master-commandline-reference.md#-enable-nodefeature-api).
Default: 1 hour.
Example:
```yaml
resyncPeriod=2h
```

View file

@ -41,7 +41,12 @@ type nfdController struct {
updateOneNodeChan chan string
}
func newNfdController(config *restclient.Config, disableNodeFeature bool) (*nfdController, error) {
type nfdApiControllerOptions struct {
disableNodeFeature bool
resyncPeriod time.Duration
}
func newNfdController(config *restclient.Config, nfdApiControllerOptions nfdApiControllerOptions) (*nfdController, error) {
c := &nfdController{
stopChan: make(chan struct{}, 1),
updateAllNodesChan: make(chan struct{}, 1),
@ -49,11 +54,10 @@ func newNfdController(config *restclient.Config, disableNodeFeature bool) (*nfdC
}
nfdClient := nfdclientset.NewForConfigOrDie(config)
informerFactory := nfdinformers.NewSharedInformerFactory(nfdClient, 1*time.Hour)
informerFactory := nfdinformers.NewSharedInformerFactory(nfdClient, nfdApiControllerOptions.resyncPeriod)
// Add informer for NodeFeature objects
if !disableNodeFeature {
if !nfdApiControllerOptions.disableNodeFeature {
featureInformer := informerFactory.Nfd().V1alpha1().NodeFeatures()
if _, err := featureInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
@ -83,7 +87,7 @@ func newNfdController(config *restclient.Config, disableNodeFeature bool) (*nfdC
AddFunc: func(object interface{}) {
key, _ := cache.MetaNamespaceKeyFunc(object)
klog.V(2).Infof("NodeFeatureRule %v added", key)
if !disableNodeFeature {
if !nfdApiControllerOptions.disableNodeFeature {
c.updateAllNodes()
}
// else: rules will be processed only when gRPC requests are received
@ -91,7 +95,7 @@ func newNfdController(config *restclient.Config, disableNodeFeature bool) (*nfdC
UpdateFunc: func(oldObject, newObject interface{}) {
key, _ := cache.MetaNamespaceKeyFunc(newObject)
klog.V(2).Infof("NodeFeatureRule %v updated", key)
if !disableNodeFeature {
if !nfdApiControllerOptions.disableNodeFeature {
c.updateAllNodes()
}
// else: rules will be processed only when gRPC requests are received
@ -99,7 +103,7 @@ func newNfdController(config *restclient.Config, disableNodeFeature bool) (*nfdC
DeleteFunc: func(object interface{}) {
key, _ := cache.MetaNamespaceKeyFunc(object)
klog.V(2).Infof("NodeFeatureRule %v deleted", key)
if !disableNodeFeature {
if !nfdApiControllerOptions.disableNodeFeature {
c.updateAllNodes()
}
// else: rules will be processed only when gRPC requests are received

View file

@ -640,24 +640,32 @@ extraLabelNs: ["added.ns.io"]
// Update config and verify the effect
writeConfig(`
extraLabelNs: ["override.ns.io"]
resyncPeriod: '2h'
`)
So(func() interface{} { return master.config.ExtraLabelNs },
withTimeout, 2*time.Second, ShouldResemble, utils.StringSetVal{"override.ns.io": struct{}{}})
So(func() interface{} { return master.config.ResyncPeriod.Duration },
withTimeout, 2*time.Second, ShouldResemble, time.Duration(2)*time.Hour)
// Removing config file should get back our defaults
err = os.RemoveAll(tmpDir)
So(err, ShouldBeNil)
So(func() interface{} { return master.config.ExtraLabelNs },
withTimeout, 2*time.Second, ShouldResemble, utils.StringSetVal{})
So(func() interface{} { return master.config.ResyncPeriod.Duration },
withTimeout, 2*time.Second, ShouldResemble, time.Duration(1)*time.Hour)
// Re-creating config dir and file should change the config
err = os.MkdirAll(configDir, 0755)
So(err, ShouldBeNil)
writeConfig(`
extraLabelNs: ["another.override.ns"]
resyncPeriod: '3m'
`)
So(func() interface{} { return master.config.ExtraLabelNs },
withTimeout, 2*time.Second, ShouldResemble, utils.StringSetVal{"another.override.ns": struct{}{}})
So(func() interface{} { return master.config.ResyncPeriod.Duration },
withTimeout, 2*time.Second, ShouldResemble, time.Duration(3)*time.Minute)
})
})
}

View file

@ -70,6 +70,7 @@ type NFDConfig struct {
NoPublish bool
ResourceLabels utils.StringSetVal
EnableTaints bool
ResyncPeriod utils.DurationVal
}
// ConfigOverrideArgs are args that override config file options
@ -80,6 +81,7 @@ type ConfigOverrideArgs struct {
ResourceLabels *utils.StringSetVal
EnableTaints *bool
NoPublish *bool
ResyncPeriod *utils.DurationVal
}
// Args holds command line arguments
@ -96,6 +98,7 @@ type Args struct {
Prune bool
VerifyNodeName bool
Options string
ResyncPeriod utils.DurationVal
Overrides ConfigOverrideArgs
}
@ -172,6 +175,7 @@ func newDefaultConfig() *NFDConfig {
NoPublish: false,
ResourceLabels: utils.StringSetVal{},
EnableTaints: false,
ResyncPeriod: utils.DurationVal{Duration: time.Duration(1) * time.Hour},
}
}
@ -195,7 +199,10 @@ func (m *nfdMaster) Run() error {
return err
}
klog.Info("starting nfd api controller")
m.nfdController, err = newNfdController(kubeconfig, !m.args.EnableNodeFeatureApi)
m.nfdController, err = newNfdController(kubeconfig, nfdApiControllerOptions{
disableNodeFeature: !m.args.EnableNodeFeatureApi,
resyncPeriod: m.args.ResyncPeriod.Duration,
})
if err != nil {
return fmt.Errorf("failed to initialize CRD controller: %w", err)
}
@ -1112,6 +1119,9 @@ func (m *nfdMaster) configure(filepath string, overrides string) error {
if m.args.Overrides.LabelWhiteList != nil {
c.LabelWhiteList = *m.args.Overrides.LabelWhiteList
}
if m.args.Overrides.ResyncPeriod != nil {
c.ResyncPeriod = *m.args.Overrides.ResyncPeriod
}
m.config = c
if !c.NoPublish {

View file

@ -80,7 +80,7 @@ type coreConfig struct {
FeatureSources []string
Sources *[]string
LabelSources []string
SleepInterval duration
SleepInterval utils.DurationVal
}
type sourcesConfig map[string]source.Config
@ -127,10 +127,6 @@ type nfdWorker struct {
labelSources []source.LabelSource
}
type duration struct {
time.Duration
}
// This ticker can represent infinite and normal intervals.
type infiniteTicker struct {
*time.Ticker
@ -169,7 +165,7 @@ func newDefaultConfig() *NFDConfig {
return &NFDConfig{
Core: coreConfig{
LabelWhiteList: utils.RegexpVal{Regexp: *regexp.MustCompile("")},
SleepInterval: duration{60 * time.Second},
SleepInterval: utils.DurationVal{Duration: 60 * time.Second},
FeatureSources: []string{"all"},
LabelSources: []string{"all"},
Klog: make(map[string]string),
@ -369,7 +365,7 @@ func (c *coreConfig) sanitize() {
if c.SleepInterval.Duration > 0 && c.SleepInterval.Duration < time.Second {
klog.Warningf("too short sleep interval specified (%s), forcing to 1s",
c.SleepInterval.Duration.String())
c.SleepInterval = duration{time.Second}
c.SleepInterval = utils.DurationVal{Duration: time.Second}
}
}
@ -756,27 +752,6 @@ func (m *nfdWorker) getNfdClient() (*nfdclient.Clientset, error) {
return c, nil
}
// UnmarshalJSON implements the Unmarshaler interface from "encoding/json"
func (d *duration) UnmarshalJSON(data []byte) error {
var v interface{}
if err := json.Unmarshal(data, &v); err != nil {
return err
}
switch val := v.(type) {
case float64:
d.Duration = time.Duration(val)
case string:
var err error
d.Duration, err = time.ParseDuration(val)
if err != nil {
return err
}
default:
return fmt.Errorf("invalid duration %s", data)
}
return nil
}
// UnmarshalJSON implements the Unmarshaler interface from "encoding/json"
func (c *sourcesConfig) UnmarshalJSON(data []byte) error {
// First do a raw parse to get the per-source data

View file

@ -23,6 +23,7 @@ import (
"regexp"
"sort"
"strings"
"time"
)
// RegexpVal is a wrapper for regexp command line flags
@ -173,3 +174,39 @@ func NewKlogFlagVal(f *flag.Flag) *KlogFlagVal {
type boolFlag interface {
IsBoolFlag() bool
}
// DurationVal is a wrapper for handling time.Duration as a command line flag
type DurationVal struct {
time.Duration
}
// UnmarshalJSON implements the Unmarshaler interface from "encoding/json"
func (d *DurationVal) UnmarshalJSON(data []byte) error {
var v interface{}
if err := json.Unmarshal(data, &v); err != nil {
return err
}
switch val := v.(type) {
case float64:
d.Duration = time.Duration(val)
case string:
var err error
d.Duration, err = time.ParseDuration(val)
if err != nil {
return err
}
default:
return fmt.Errorf("invalid duration %s", data)
}
return nil
}
// Set implements the flag.Value interface
func (d *DurationVal) Set(val string) error {
m, err := time.ParseDuration(val)
if err != nil {
return err
}
*d = DurationVal{m}
return nil
}