diff --git a/cmd/nfd-master/main.go b/cmd/nfd-master/main.go index d56032e37..bd7fcf51d 100644 --- a/cmd/nfd-master/main.go +++ b/cmd/nfd-master/main.go @@ -128,6 +128,8 @@ func initFlags(flagset *flag.FlagSet) (*master.Args, *master.ConfigOverrideArgs) flagset.StringVar(&args.Options, "options", "", "Specify config options from command line. Config options are specified "+ "in the same format as in the config file (i.e. json or yaml). These options") + flagset.BoolVar(&args.EnableLeaderElection, "enable-leader-election", false, + "Enables a leader election. Enable this when running more than one replica on nfd master.") overrides := &master.ConfigOverrideArgs{ LabelWhiteList: &utils.RegexpVal{}, diff --git a/deployment/base/rbac/master-clusterrole.yaml b/deployment/base/rbac/master-clusterrole.yaml index 155c5f069..e61b4dc06 100644 --- a/deployment/base/rbac/master-clusterrole.yaml +++ b/deployment/base/rbac/master-clusterrole.yaml @@ -22,3 +22,18 @@ rules: - get - list - watch +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create +- apiGroups: + - coordination.k8s.io + resources: + - leases + resourceNames: + - "nfd-master.nfd.kubernetes.io" + verbs: + - get + - update diff --git a/deployment/components/master-config/nfd-master.conf.example b/deployment/components/master-config/nfd-master.conf.example index 3760e825d..6344ec51e 100644 --- a/deployment/components/master-config/nfd-master.conf.example +++ b/deployment/components/master-config/nfd-master.conf.example @@ -5,3 +5,9 @@ # enableTaints: false # labelWhiteList: "foo" # resyncPeriod: "2h" +# leaderElection: +# leaseDuration: 15s +# # this value has to be lower than leaseDuration and greater than retryPeriod*1.2 +# renewDeadline: 10s +# # this value has to be greater than 0 +# retryPeriod: 2s diff --git a/deployment/helm/node-feature-discovery/templates/clusterrole.yaml b/deployment/helm/node-feature-discovery/templates/clusterrole.yaml index 84b32644f..0cec23b66 100644 --- a/deployment/helm/node-feature-discovery/templates/clusterrole.yaml +++ b/deployment/helm/node-feature-discovery/templates/clusterrole.yaml @@ -25,6 +25,21 @@ rules: - get - list - watch +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create +- apiGroups: + - coordination.k8s.io + resources: + - leases + resourceNames: + - "nfd-master.nfd.kubernetes.io" + verbs: + - get + - update {{- end }} --- diff --git a/deployment/helm/node-feature-discovery/templates/master.yaml b/deployment/helm/node-feature-discovery/templates/master.yaml index 264e3bb74..9efb968ce 100644 --- a/deployment/helm/node-feature-discovery/templates/master.yaml +++ b/deployment/helm/node-feature-discovery/templates/master.yaml @@ -82,6 +82,9 @@ spec: - "-port={{ .Values.master.port | default "8080" }}" {{- if .Values.enableNodeFeatureApi }} - "-enable-nodefeature-api" + {{- if gt (int .Values.master.replicaCount) 1 }} + - "-enable-leader-election" + {{- end }} {{- end }} {{- if .Values.master.extraLabelNs | empty | not }} - "-extra-label-ns={{- join "," .Values.master.extraLabelNs }}" diff --git a/deployment/helm/node-feature-discovery/values.yaml b/deployment/helm/node-feature-discovery/values.yaml index f1524c0c6..67a6c342c 100644 --- a/deployment/helm/node-feature-discovery/values.yaml +++ b/deployment/helm/node-feature-discovery/values.yaml @@ -21,6 +21,12 @@ master: # enableTaints: false # labelWhiteList: "foo" # resyncPeriod: "2h" + # leaderElection: + # leaseDuration: 15s + # # this value has to be lower than leaseDuration and greater than retryPeriod*1.2 + # renewDeadline: 10s + # # this value has to be greater than 0 + # retryPeriod: 2s ### # The TCP port that nfd-master listens for incoming requests. Default: 8080 port: 8080 diff --git a/docs/reference/master-commandline-reference.md b/docs/reference/master-commandline-reference.md index 90ca236a7..27a3e5b93 100644 --- a/docs/reference/master-commandline-reference.md +++ b/docs/reference/master-commandline-reference.md @@ -151,6 +151,20 @@ Example: nfd-master -enable-nodefeature-api ``` +### -enable-leader-election + +The `-enable-leader-election` flag enables leader election for NFD-Master. +It is advised to turn on this flag when running more than one instance of +NFD-Master. + +This flag takes effect only when combined with `-enable-nodefeature-api` flag. + +Default: false + +```bash +nfd-master -enable-nodefeature-api -enable-leader-election +``` + ### -enable-taints The `-enable-taints` flag enables/disables node tainting feature of NFD. diff --git a/docs/reference/master-configuration-reference.md b/docs/reference/master-configuration-reference.md index 5168acefd..3dd5d92ce 100644 --- a/docs/reference/master-configuration-reference.md +++ b/docs/reference/master-configuration-reference.md @@ -114,7 +114,7 @@ Example: labelWhiteList: "foo" ``` -### resyncPeriod +## resyncPeriod The `resyncPeriod` option specifies the NFD API controller resync period. The resync means nfd-master replaying all NodeFeature and NodeFeatureRule objects, @@ -128,5 +128,64 @@ Default: 1 hour. Example: ```yaml -resyncPeriod=2h +resyncPeriod: 2h +``` + +## leaderElection + +The `leaderElection` section exposes configuration to tweak leader election. + +### leaderElection.leaseDuration + +`leaderElection.leaseDuration` is the duration that non-leader candidates will +wait to force acquire leadership. This is measured against time of +last observed ack. + +A client needs to wait a full LeaseDuration without observing a change to +the record before it can attempt to take over. When all clients are +shutdown and a new set of clients are started with different names against +the same leader record, they must wait the full LeaseDuration before +attempting to acquire the lease. Thus LeaseDuration should be as short as +possible (within your tolerance for clock skew rate) to avoid a possible +long waits in the scenario. + +Default: 15 seconds. + +Example: + +```yaml +leaderElection: + leaseDurtation: 15s +``` + +### leaderElection.renewDeadline + +`leaderElection.renewDeadline` is the duration that the acting master will retry +refreshing leadership before giving up. + +This value has to be lower than leaseDuration and greater than retryPeriod*1.2. + +Default: 10 seconds. + +Example: + +```yaml +leaderElection: + renewDeadline: 10s +``` + +### leaderElection.retryPeriod + +`leaderElection.retryPeriod` is the duration the LeaderElector clients should wait +between tries of actions. + +It has to be greater than 0. + +Default: 2 seconds. + +Example: + +```yaml +leaderElection: + retryPeriod: 2s ``` diff --git a/docs/usage/nfd-master.md b/docs/usage/nfd-master.md index 60d009e9a..23ab75712 100644 --- a/docs/usage/nfd-master.md +++ b/docs/usage/nfd-master.md @@ -29,6 +29,11 @@ are created on the node (note the allowed > its [`-enable-nodefeature-api`](../reference/worker-commandline-reference.md#-enable-nodefeature-api) > flag. +When `-enable-nodefeature-api` option is enabled and NFD-Master is intended to run +with more than one replica, it is advised to use `-enable-leader-election` flag. +This flag turns on leader election for NFD-Master and let only one replica +to act on changes in NodeFeature and NodeFeatureRule objects. + ## NodeFeatureRule controller NFD-Master acts as the controller for diff --git a/pkg/nfd-master/nfd-master-internal_test.go b/pkg/nfd-master/nfd-master-internal_test.go index 52dab52b8..ffcac269c 100644 --- a/pkg/nfd-master/nfd-master-internal_test.go +++ b/pkg/nfd-master/nfd-master-internal_test.go @@ -558,6 +558,10 @@ denyLabelNs: ["denied.ns.io","denied.kubernetes.io"] resourceLabels: ["vendor-1.com/feature-1","vendor-2.io/feature-2"] enableTaints: false labelWhiteList: "foo" +leaderElection: + leaseDuration: 20s + renewDeadline: 4s + retryPeriod: 30s `) f.Close() So(err, ShouldBeNil) @@ -573,6 +577,9 @@ labelWhiteList: "foo" So(master.config.ResourceLabels, ShouldResemble, utils.StringSetVal{"vendor-1.com/feature-1": struct{}{}, "vendor-2.io/feature-2": struct{}{}}) // from cmdline So(master.config.DenyLabelNs, ShouldResemble, utils.StringSetVal{"denied.ns.io": struct{}{}, "denied.kubernetes.io": struct{}{}}) So(master.config.LabelWhiteList.String(), ShouldEqual, "foo") + So(master.config.LeaderElection.LeaseDuration.Seconds(), ShouldEqual, float64(20)) + So(master.config.LeaderElection.RenewDeadline.Seconds(), ShouldEqual, float64(4)) + So(master.config.LeaderElection.RetryPeriod.Seconds(), ShouldEqual, float64(30)) }) }) diff --git a/pkg/nfd-master/nfd-master.go b/pkg/nfd-master/nfd-master.go index 3d2795985..5eda86559 100644 --- a/pkg/nfd-master/nfd-master.go +++ b/pkg/nfd-master/nfd-master.go @@ -30,6 +30,7 @@ import ( "strings" "time" + "github.com/google/uuid" "golang.org/x/net/context" "google.golang.org/grpc" "google.golang.org/grpc/credentials" @@ -38,9 +39,12 @@ import ( "google.golang.org/grpc/peer" corev1 "k8s.io/api/core/v1" k8sQuantity "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" k8sLabels "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/kubernetes" restclient "k8s.io/client-go/rest" + "k8s.io/client-go/tools/leaderelection" + "k8s.io/client-go/tools/leaderelection/resourcelock" "k8s.io/klog/v2" controller "k8s.io/kubernetes/pkg/controller" taintutils "k8s.io/kubernetes/pkg/util/taints" @@ -71,6 +75,14 @@ type NFDConfig struct { ResourceLabels utils.StringSetVal EnableTaints bool ResyncPeriod utils.DurationVal + LeaderElection LeaderElectionConfig +} + +// LeaderElectionConfig contains the configuration for leader election +type LeaderElectionConfig struct { + LeaseDuration utils.DurationVal + RenewDeadline utils.DurationVal + RetryPeriod utils.DurationVal } // ConfigOverrideArgs are args that override config file options @@ -98,6 +110,7 @@ type Args struct { Prune bool VerifyNodeName bool Options string + EnableLeaderElection bool Overrides ConfigOverrideArgs } @@ -174,6 +187,11 @@ func newDefaultConfig() *NFDConfig { ResourceLabels: utils.StringSetVal{}, EnableTaints: false, ResyncPeriod: utils.DurationVal{Duration: time.Duration(1) * time.Hour}, + LeaderElection: LeaderElectionConfig{ + LeaseDuration: utils.DurationVal{Duration: time.Duration(15) * time.Second}, + RetryPeriod: utils.DurationVal{Duration: time.Duration(2) * time.Second}, + RenewDeadline: utils.DurationVal{Duration: time.Duration(10) * time.Second}, + }, } } @@ -221,7 +239,11 @@ func (m *nfdMaster) Run() error { // Run updater that handles events from the nfd CRD API. if m.nfdController != nil { - go m.nfdAPIUpdateHandler() + if m.args.EnableLeaderElection { + go m.nfdAPIUpdateHandlerWithLeaderElection() + } else { + go m.nfdAPIUpdateHandler() + } } // Notify that we're ready to accept connections @@ -1230,3 +1252,47 @@ func (m *nfdMaster) startNfdApiController() error { } return nil } + +func (m *nfdMaster) nfdAPIUpdateHandlerWithLeaderElection() { + ctx := context.Background() + client, err := m.apihelper.GetClient() + if err != nil { + klog.ErrorS(err, "failed to get Kubernetes client") + m.Stop() + } + lock := &resourcelock.LeaseLock{ + LeaseMeta: metav1.ObjectMeta{ + Name: "nfd-master.nfd.kubernetes.io", + Namespace: m.namespace, + }, + Client: client.CoordinationV1(), + LockConfig: resourcelock.ResourceLockConfig{ + // add uuid to prevent situation where 2 nfd-master nodes run on same node + Identity: m.nodeName + "_" + uuid.NewString(), + }, + } + config := leaderelection.LeaderElectionConfig{ + Lock: lock, + // make it configurable? + LeaseDuration: m.config.LeaderElection.LeaseDuration.Duration, + RetryPeriod: m.config.LeaderElection.RetryPeriod.Duration, + RenewDeadline: m.config.LeaderElection.RenewDeadline.Duration, + Callbacks: leaderelection.LeaderCallbacks{ + OnStartedLeading: func(_ context.Context) { + m.nfdAPIUpdateHandler() + }, + OnStoppedLeading: func() { + // We lost the lock. + klog.ErrorS(err, "leaderelection lock was lost") + m.Stop() + }, + }, + } + leaderElector, err := leaderelection.NewLeaderElector(config) + if err != nil { + klog.ErrorS(err, "couldn't create leader elector") + m.Stop() + } + + leaderElector.Run(ctx) +}