mirror of
https://github.com/kubernetes-sigs/node-feature-discovery.git
synced 2025-03-31 04:04:51 +00:00
Merge b05a8443e7
into af374df837
This commit is contained in:
commit
000d5803b8
8 changed files with 59 additions and 1 deletions
|
@ -140,6 +140,8 @@ func initFlags(flagset *flag.FlagSet) (*master.Args, *master.ConfigOverrideArgs)
|
|||
flagset.Var(overrides.ResyncPeriod, "resync-period", "Specify the NFD API controller resync period.")
|
||||
overrides.NfdApiParallelism = flagset.Int("nfd-api-parallelism", 10, "Defines the maximum number of goroutines responsible of updating nodes. "+
|
||||
"Can be used for the throttling mechanism.")
|
||||
overrides.InformerPageSize = flagset.Int64("informer-page-size", 200,
|
||||
"The list size to use when listing NodeFeature objects to sync informer cache.")
|
||||
|
||||
return args, overrides
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# extraLabelNs: ["added.ns.io","added.kubernets.io"]
|
||||
# denyLabelNs: ["denied.ns.io","denied.kubernetes.io"]
|
||||
# enableTaints: false
|
||||
# informerPageSize: 200
|
||||
# labelWhiteList: "foo"
|
||||
# resyncPeriod: "2h"
|
||||
# restrictions:
|
||||
|
|
|
@ -27,6 +27,7 @@ master:
|
|||
# extraLabelNs: ["added.ns.io","added.kubernets.io"]
|
||||
# denyLabelNs: ["denied.ns.io","denied.kubernetes.io"]
|
||||
# enableTaints: false
|
||||
# informerPageSize: 200
|
||||
# labelWhiteList: "foo"
|
||||
# resyncPeriod: "2h"
|
||||
# restrictions:
|
||||
|
|
|
@ -173,6 +173,21 @@ Example:
|
|||
nfd-master -deny-label-ns=*.vendor.com,vendor-2.io
|
||||
```
|
||||
|
||||
### -informer-page-size
|
||||
|
||||
The `-informer-page-size` flag is used to control pagination
|
||||
during informer cache sync on nfd-master startup.
|
||||
This is useful to control load on api-server/etcd as listing
|
||||
NodeFeature objects can be expensive, especially in large clusters.
|
||||
|
||||
Default: 200
|
||||
|
||||
Example:
|
||||
|
||||
```bash
|
||||
nfd-master -informer-page-size=20
|
||||
```
|
||||
|
||||
### -config
|
||||
|
||||
The `-config` flag specifies the path of the nfd-master configuration file to
|
||||
|
|
|
@ -216,6 +216,21 @@ Example:
|
|||
nfdApiParallelism: 1
|
||||
```
|
||||
|
||||
## informerPageSize
|
||||
|
||||
The `informerPageSize` option is used to control pagination
|
||||
during informer cache sync on nfd-master startup.
|
||||
This is useful to control load on api-server/etcd as listing
|
||||
NodeFeature objects can be expensive, especially in large clusters.
|
||||
|
||||
Default: 200
|
||||
|
||||
Example:
|
||||
|
||||
```yaml
|
||||
informerPageSize: 50
|
||||
```
|
||||
|
||||
## klog
|
||||
|
||||
The following options specify the logger configuration. Most of which can be
|
||||
|
|
|
@ -84,3 +84,11 @@ If you have RBAC authorization enabled (as is the default e.g. with clusters
|
|||
initialized with kubeadm) you need to configure the appropriate ClusterRoles,
|
||||
ClusterRoleBindings and a ServiceAccount for NFD to create node
|
||||
labels. The provided template will configure these for you.
|
||||
|
||||
## Informer List Pagination
|
||||
|
||||
When NFD Master starts up it starts an informer on the nodefeatures resources.
|
||||
These resources can be large and in a large cluster this initial list call
|
||||
to sync the informer cache can be expensive and heavy on api-server/etcd.
|
||||
You can use the `informer-list-size` argument to NFD master to
|
||||
control pagination size to help control the load during NFD-Master restart.
|
||||
|
|
|
@ -57,6 +57,7 @@ type nfdApiControllerOptions struct {
|
|||
ResyncPeriod time.Duration
|
||||
K8sClient k8sclient.Interface
|
||||
NodeFeatureNamespaceSelector *metav1.LabelSelector
|
||||
ListSize int64
|
||||
}
|
||||
|
||||
func init() {
|
||||
|
@ -97,10 +98,17 @@ func newNfdController(config *restclient.Config, nfdApiControllerOptions nfdApiC
|
|||
// Tweak list opts on initial sync to avoid timeouts on the apiserver.
|
||||
// NodeFeature objects are huge and the Kubernetes apiserver
|
||||
// (v1.30) experiences http handler timeouts when the resource
|
||||
// version is set to some non-empty value (TODO: find out why).
|
||||
// version is set to some non-empty value
|
||||
// https://github.com/kubernetes/kubernetes/blob/ace55542575fb098b3e413692bbe2bc20d2348ba/staging/src/k8s.io/apiserver/pkg/storage/cacher/cacher.go#L600-L616 if you set resource version to 0
|
||||
// it serves the request from apiservers cache and doesn't use pagination otherwise pagination will default to 500
|
||||
// so that's why this is required on large clusters
|
||||
// So by setting this we're making it go to ETCD instead of from api-server cache, there's some WIP in k/k
|
||||
// that seems to imply they're working on improving this behavior where you'll be able to paginate from apiserver cache
|
||||
// it's not supported yet (2/2025), would be good to track this though kubernetes/kubernetes#108003
|
||||
if opts.ResourceVersion == "0" {
|
||||
opts.ResourceVersion = ""
|
||||
}
|
||||
opts.Limit = nfdApiControllerOptions.ListSize // value of 0 disables pagination
|
||||
}
|
||||
featureInformer := nfdinformersv1alpha1.New(informerFactory, "", tweakListOpts).NodeFeatures()
|
||||
if _, err := featureInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||
|
|
|
@ -92,6 +92,7 @@ type NFDConfig struct {
|
|||
NfdApiParallelism int
|
||||
Klog klogutils.KlogConfigOpts
|
||||
Restrictions Restrictions
|
||||
InformerPageSize int64
|
||||
}
|
||||
|
||||
// LeaderElectionConfig contains the configuration for leader election
|
||||
|
@ -110,6 +111,7 @@ type ConfigOverrideArgs struct {
|
|||
NoPublish *bool
|
||||
ResyncPeriod *utils.DurationVal
|
||||
NfdApiParallelism *int
|
||||
InformerPageSize *int64
|
||||
}
|
||||
|
||||
// Args holds command line arguments
|
||||
|
@ -122,6 +124,7 @@ type Args struct {
|
|||
Prune bool
|
||||
Options string
|
||||
EnableLeaderElection bool
|
||||
MetricsPort int
|
||||
|
||||
Overrides ConfigOverrideArgs
|
||||
}
|
||||
|
@ -242,6 +245,7 @@ func newDefaultConfig() *NFDConfig {
|
|||
NfdApiParallelism: 10,
|
||||
EnableTaints: false,
|
||||
ResyncPeriod: utils.DurationVal{Duration: time.Duration(1) * time.Hour},
|
||||
InformerPageSize: 200,
|
||||
LeaderElection: LeaderElectionConfig{
|
||||
LeaseDuration: utils.DurationVal{Duration: time.Duration(15) * time.Second},
|
||||
RetryPeriod: utils.DurationVal{Duration: time.Duration(2) * time.Second},
|
||||
|
@ -1190,6 +1194,9 @@ func (m *nfdMaster) configure(filepath string, overrides string) error {
|
|||
if m.args.Overrides.NfdApiParallelism != nil {
|
||||
c.NfdApiParallelism = *m.args.Overrides.NfdApiParallelism
|
||||
}
|
||||
if m.args.Overrides.InformerPageSize != nil {
|
||||
c.InformerPageSize = *m.args.Overrides.InformerPageSize
|
||||
}
|
||||
|
||||
if c.NfdApiParallelism <= 0 {
|
||||
return fmt.Errorf("the maximum number of concurrent labelers should be a non-zero positive number")
|
||||
|
@ -1295,6 +1302,7 @@ func (m *nfdMaster) startNfdApiController() error {
|
|||
ResyncPeriod: m.config.ResyncPeriod.Duration,
|
||||
K8sClient: m.k8sClient,
|
||||
NodeFeatureNamespaceSelector: m.config.Restrictions.NodeFeatureNamespaceSelector,
|
||||
ListSize: m.config.InformerPageSize,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to initialize CRD controller: %w", err)
|
||||
|
|
Loading…
Add table
Reference in a new issue