1
0
Fork 0
mirror of https://github.com/kubernetes-sigs/node-feature-discovery.git synced 2024-12-15 17:50:49 +00:00
node-feature-discovery/pkg/nfd-master/nfd-api-controller.go
Markus Lehtonen 2bb8a72532 nfd-master: proper shutdown of nfd api informers
Stop blocking on event channels when the api controller is stopped.
Ensures that the nfd API informer factory is properly shut down and all
resources released when stop() is called. This eliminates a memory leak
on re-configure events when leader election is enabled.
2024-08-20 12:44:08 +03:00

234 lines
7.9 KiB
Go

/*
Copyright 2021-2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nfdmaster
import (
"fmt"
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
restclient "k8s.io/client-go/rest"
"k8s.io/client-go/tools/cache"
"k8s.io/klog/v2"
nfdclientset "sigs.k8s.io/node-feature-discovery/api/generated/clientset/versioned"
nfdscheme "sigs.k8s.io/node-feature-discovery/api/generated/clientset/versioned/scheme"
nfdinformers "sigs.k8s.io/node-feature-discovery/api/generated/informers/externalversions"
nfdinformersv1alpha1 "sigs.k8s.io/node-feature-discovery/api/generated/informers/externalversions/nfd/v1alpha1"
nfdlisters "sigs.k8s.io/node-feature-discovery/api/generated/listers/nfd/v1alpha1"
nfdv1alpha1 "sigs.k8s.io/node-feature-discovery/api/nfd/v1alpha1"
"sigs.k8s.io/node-feature-discovery/pkg/utils"
)
type nfdController struct {
featureLister nfdlisters.NodeFeatureLister
ruleLister nfdlisters.NodeFeatureRuleLister
featureGroupLister nfdlisters.NodeFeatureGroupLister
stopChan chan struct{}
updateAllNodesChan chan struct{}
updateOneNodeChan chan string
updateAllNodeFeatureGroupsChan chan struct{}
updateNodeFeatureGroupChan chan string
}
type nfdApiControllerOptions struct {
DisableNodeFeature bool
DisableNodeFeatureGroup bool
ResyncPeriod time.Duration
}
func init() {
utilruntime.Must(nfdv1alpha1.AddToScheme(nfdscheme.Scheme))
}
func newNfdController(config *restclient.Config, nfdApiControllerOptions nfdApiControllerOptions) (*nfdController, error) {
c := &nfdController{
stopChan: make(chan struct{}),
updateAllNodesChan: make(chan struct{}),
updateOneNodeChan: make(chan string),
updateAllNodeFeatureGroupsChan: make(chan struct{}),
updateNodeFeatureGroupChan: make(chan string),
}
nfdClient := nfdclientset.NewForConfigOrDie(config)
klog.V(2).InfoS("initializing new NFD API controller", "options", utils.DelayedDumper(nfdApiControllerOptions))
informerFactory := nfdinformers.NewSharedInformerFactory(nfdClient, nfdApiControllerOptions.ResyncPeriod)
// Add informer for NodeFeature objects
if !nfdApiControllerOptions.DisableNodeFeature {
tweakListOpts := func(opts *metav1.ListOptions) {
// Tweak list opts on initial sync to avoid timeouts on the apiserver.
// NodeFeature objects are huge and the Kubernetes apiserver
// (v1.30) experiences http handler timeouts when the resource
// version is set to some non-empty value (TODO: find out why).
if opts.ResourceVersion == "0" {
opts.ResourceVersion = ""
}
}
featureInformer := nfdinformersv1alpha1.New(informerFactory, "", tweakListOpts).NodeFeatures()
if _, err := featureInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
nfr := obj.(*nfdv1alpha1.NodeFeature)
klog.V(2).InfoS("NodeFeature added", "nodefeature", klog.KObj(nfr))
c.updateOneNode("NodeFeature", nfr)
if !nfdApiControllerOptions.DisableNodeFeatureGroup {
c.updateAllNodeFeatureGroups()
}
},
UpdateFunc: func(oldObj, newObj interface{}) {
nfr := newObj.(*nfdv1alpha1.NodeFeature)
klog.V(2).InfoS("NodeFeature updated", "nodefeature", klog.KObj(nfr))
c.updateOneNode("NodeFeature", nfr)
if !nfdApiControllerOptions.DisableNodeFeatureGroup {
c.updateAllNodeFeatureGroups()
}
},
DeleteFunc: func(obj interface{}) {
nfr := obj.(*nfdv1alpha1.NodeFeature)
klog.V(2).InfoS("NodeFeature deleted", "nodefeature", klog.KObj(nfr))
c.updateOneNode("NodeFeature", nfr)
if !nfdApiControllerOptions.DisableNodeFeatureGroup {
c.updateAllNodeFeatureGroups()
}
},
}); err != nil {
return nil, err
}
c.featureLister = featureInformer.Lister()
}
// Add informer for NodeFeatureRule objects
nodeFeatureRuleInformer := informerFactory.Nfd().V1alpha1().NodeFeatureRules()
if _, err := nodeFeatureRuleInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(object interface{}) {
klog.V(2).InfoS("NodeFeatureRule added", "nodefeaturerule", klog.KObj(object.(metav1.Object)))
if !nfdApiControllerOptions.DisableNodeFeature {
c.updateAllNodes()
}
// else: rules will be processed only when gRPC requests are received
},
UpdateFunc: func(oldObject, newObject interface{}) {
klog.V(2).InfoS("NodeFeatureRule updated", "nodefeaturerule", klog.KObj(newObject.(metav1.Object)))
if !nfdApiControllerOptions.DisableNodeFeature {
c.updateAllNodes()
}
// else: rules will be processed only when gRPC requests are received
},
DeleteFunc: func(object interface{}) {
klog.V(2).InfoS("NodeFeatureRule deleted", "nodefeaturerule", klog.KObj(object.(metav1.Object)))
if !nfdApiControllerOptions.DisableNodeFeature {
c.updateAllNodes()
}
// else: rules will be processed only when gRPC requests are received
},
}); err != nil {
return nil, err
}
c.ruleLister = nodeFeatureRuleInformer.Lister()
// Add informer for NodeFeatureGroup objects
if !nfdApiControllerOptions.DisableNodeFeatureGroup {
nodeFeatureGroupInformer := informerFactory.Nfd().V1alpha1().NodeFeatureGroups()
if _, err := nodeFeatureGroupInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
nfg := obj.(*nfdv1alpha1.NodeFeatureGroup)
klog.V(2).InfoS("NodeFeatureGroup added", "nodeFeatureGroup", klog.KObj(nfg))
c.updateNodeFeatureGroup(nfg.Name)
},
UpdateFunc: func(oldObj, newObj interface{}) {
nfg := newObj.(*nfdv1alpha1.NodeFeatureGroup)
klog.V(2).InfoS("NodeFeatureGroup updated", "nodeFeatureGroup", klog.KObj(nfg))
c.updateNodeFeatureGroup(nfg.Name)
},
DeleteFunc: func(obj interface{}) {
nfg := obj.(*nfdv1alpha1.NodeFeatureGroup)
klog.V(2).InfoS("NodeFeatureGroup deleted", "nodeFeatureGroup", klog.KObj(nfg))
c.updateNodeFeatureGroup(nfg.Name)
},
}); err != nil {
return nil, err
}
c.featureGroupLister = nodeFeatureGroupInformer.Lister()
}
// Start informers
informerFactory.Start(c.stopChan)
now := time.Now()
ret := informerFactory.WaitForCacheSync(c.stopChan)
for res, ok := range ret {
if !ok {
return nil, fmt.Errorf("informer cache failed to sync resource %s", res)
}
}
klog.InfoS("informer caches synced", "duration", time.Since(now))
return c, nil
}
func (c *nfdController) stop() {
close(c.stopChan)
}
func getNodeNameForObj(obj metav1.Object) (string, error) {
nodeName, ok := obj.GetLabels()[nfdv1alpha1.NodeFeatureObjNodeNameLabel]
if !ok {
return "", fmt.Errorf("%q label is missing", nfdv1alpha1.NodeFeatureObjNodeNameLabel)
}
if nodeName == "" {
return "", fmt.Errorf("%q label is empty", nfdv1alpha1.NodeFeatureObjNodeNameLabel)
}
return nodeName, nil
}
func (c *nfdController) updateOneNode(typ string, obj metav1.Object) {
nodeName, err := getNodeNameForObj(obj)
if err != nil {
klog.ErrorS(err, "failed to determine node name for object", "type", typ, "object", klog.KObj(obj))
return
}
select {
case c.updateOneNodeChan <- nodeName:
case <-c.stopChan:
}
}
func (c *nfdController) updateAllNodes() {
select {
case c.updateAllNodesChan <- struct{}{}:
default:
}
}
func (c *nfdController) updateNodeFeatureGroup(nodeFeatureGroup string) {
select {
case c.updateNodeFeatureGroupChan <- nodeFeatureGroup:
case <-c.stopChan:
}
}
func (c *nfdController) updateAllNodeFeatureGroups() {
select {
case c.updateAllNodeFeatureGroupsChan <- struct{}{}:
default:
}
}