1
0
Fork 0
mirror of https://github.com/kubernetes-sigs/node-feature-discovery.git synced 2025-03-31 04:04:51 +00:00
This commit is contained in:
Igor Velichkovich 2025-03-27 17:21:30 +02:00 committed by GitHub
commit 000d5803b8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 59 additions and 1 deletions

View file

@ -140,6 +140,8 @@ func initFlags(flagset *flag.FlagSet) (*master.Args, *master.ConfigOverrideArgs)
flagset.Var(overrides.ResyncPeriod, "resync-period", "Specify the NFD API controller resync period.")
overrides.NfdApiParallelism = flagset.Int("nfd-api-parallelism", 10, "Defines the maximum number of goroutines responsible of updating nodes. "+
"Can be used for the throttling mechanism.")
overrides.InformerPageSize = flagset.Int64("informer-page-size", 200,
"The list size to use when listing NodeFeature objects to sync informer cache.")
return args, overrides
}

View file

@ -3,6 +3,7 @@
# extraLabelNs: ["added.ns.io","added.kubernets.io"]
# denyLabelNs: ["denied.ns.io","denied.kubernetes.io"]
# enableTaints: false
# informerPageSize: 200
# labelWhiteList: "foo"
# resyncPeriod: "2h"
# restrictions:

View file

@ -27,6 +27,7 @@ master:
# extraLabelNs: ["added.ns.io","added.kubernets.io"]
# denyLabelNs: ["denied.ns.io","denied.kubernetes.io"]
# enableTaints: false
# informerPageSize: 200
# labelWhiteList: "foo"
# resyncPeriod: "2h"
# restrictions:

View file

@ -173,6 +173,21 @@ Example:
nfd-master -deny-label-ns=*.vendor.com,vendor-2.io
```
### -informer-page-size
The `-informer-page-size` flag is used to control pagination
during informer cache sync on nfd-master startup.
This is useful to control load on api-server/etcd as listing
NodeFeature objects can be expensive, especially in large clusters.
Default: 200
Example:
```bash
nfd-master -informer-page-size=20
```
### -config
The `-config` flag specifies the path of the nfd-master configuration file to

View file

@ -216,6 +216,21 @@ Example:
nfdApiParallelism: 1
```
## informerPageSize
The `informerPageSize` option is used to control pagination
during informer cache sync on nfd-master startup.
This is useful to control load on api-server/etcd as listing
NodeFeature objects can be expensive, especially in large clusters.
Default: 200
Example:
```yaml
informerPageSize: 50
```
## klog
The following options specify the logger configuration. Most of which can be

View file

@ -84,3 +84,11 @@ If you have RBAC authorization enabled (as is the default e.g. with clusters
initialized with kubeadm) you need to configure the appropriate ClusterRoles,
ClusterRoleBindings and a ServiceAccount for NFD to create node
labels. The provided template will configure these for you.
## Informer List Pagination
When NFD Master starts up it starts an informer on the nodefeatures resources.
These resources can be large and in a large cluster this initial list call
to sync the informer cache can be expensive and heavy on api-server/etcd.
You can use the `informer-list-size` argument to NFD master to
control pagination size to help control the load during NFD-Master restart.

View file

@ -57,6 +57,7 @@ type nfdApiControllerOptions struct {
ResyncPeriod time.Duration
K8sClient k8sclient.Interface
NodeFeatureNamespaceSelector *metav1.LabelSelector
ListSize int64
}
func init() {
@ -97,10 +98,17 @@ func newNfdController(config *restclient.Config, nfdApiControllerOptions nfdApiC
// Tweak list opts on initial sync to avoid timeouts on the apiserver.
// NodeFeature objects are huge and the Kubernetes apiserver
// (v1.30) experiences http handler timeouts when the resource
// version is set to some non-empty value (TODO: find out why).
// version is set to some non-empty value
// https://github.com/kubernetes/kubernetes/blob/ace55542575fb098b3e413692bbe2bc20d2348ba/staging/src/k8s.io/apiserver/pkg/storage/cacher/cacher.go#L600-L616 if you set resource version to 0
// it serves the request from apiservers cache and doesn't use pagination otherwise pagination will default to 500
// so that's why this is required on large clusters
// So by setting this we're making it go to ETCD instead of from api-server cache, there's some WIP in k/k
// that seems to imply they're working on improving this behavior where you'll be able to paginate from apiserver cache
// it's not supported yet (2/2025), would be good to track this though kubernetes/kubernetes#108003
if opts.ResourceVersion == "0" {
opts.ResourceVersion = ""
}
opts.Limit = nfdApiControllerOptions.ListSize // value of 0 disables pagination
}
featureInformer := nfdinformersv1alpha1.New(informerFactory, "", tweakListOpts).NodeFeatures()
if _, err := featureInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{

View file

@ -92,6 +92,7 @@ type NFDConfig struct {
NfdApiParallelism int
Klog klogutils.KlogConfigOpts
Restrictions Restrictions
InformerPageSize int64
}
// LeaderElectionConfig contains the configuration for leader election
@ -110,6 +111,7 @@ type ConfigOverrideArgs struct {
NoPublish *bool
ResyncPeriod *utils.DurationVal
NfdApiParallelism *int
InformerPageSize *int64
}
// Args holds command line arguments
@ -122,6 +124,7 @@ type Args struct {
Prune bool
Options string
EnableLeaderElection bool
MetricsPort int
Overrides ConfigOverrideArgs
}
@ -242,6 +245,7 @@ func newDefaultConfig() *NFDConfig {
NfdApiParallelism: 10,
EnableTaints: false,
ResyncPeriod: utils.DurationVal{Duration: time.Duration(1) * time.Hour},
InformerPageSize: 200,
LeaderElection: LeaderElectionConfig{
LeaseDuration: utils.DurationVal{Duration: time.Duration(15) * time.Second},
RetryPeriod: utils.DurationVal{Duration: time.Duration(2) * time.Second},
@ -1190,6 +1194,9 @@ func (m *nfdMaster) configure(filepath string, overrides string) error {
if m.args.Overrides.NfdApiParallelism != nil {
c.NfdApiParallelism = *m.args.Overrides.NfdApiParallelism
}
if m.args.Overrides.InformerPageSize != nil {
c.InformerPageSize = *m.args.Overrides.InformerPageSize
}
if c.NfdApiParallelism <= 0 {
return fmt.Errorf("the maximum number of concurrent labelers should be a non-zero positive number")
@ -1295,6 +1302,7 @@ func (m *nfdMaster) startNfdApiController() error {
ResyncPeriod: m.config.ResyncPeriod.Duration,
K8sClient: m.k8sClient,
NodeFeatureNamespaceSelector: m.config.Restrictions.NodeFeatureNamespaceSelector,
ListSize: m.config.InformerPageSize,
})
if err != nil {
return fmt.Errorf("failed to initialize CRD controller: %w", err)