2023-01-03 13:23:46 +01:00
|
|
|
/*
|
|
|
|
Copyright 2023 The Kubernetes Authors.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2023-08-17 16:53:06 +03:00
|
|
|
package nfdgarbagecollector
|
2023-01-03 13:23:46 +01:00
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
topologyclientset "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/generated/clientset/versioned"
|
|
|
|
corev1 "k8s.io/api/core/v1"
|
|
|
|
"k8s.io/apimachinery/pkg/api/errors"
|
|
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
2023-08-17 18:16:36 +03:00
|
|
|
"k8s.io/apimachinery/pkg/labels"
|
2023-01-03 13:23:46 +01:00
|
|
|
"k8s.io/apimachinery/pkg/util/sets"
|
|
|
|
"k8s.io/client-go/informers"
|
|
|
|
"k8s.io/client-go/kubernetes"
|
|
|
|
restclient "k8s.io/client-go/rest"
|
|
|
|
"k8s.io/client-go/tools/cache"
|
|
|
|
"k8s.io/klog/v2"
|
|
|
|
|
|
|
|
"sigs.k8s.io/node-feature-discovery/pkg/apihelper"
|
|
|
|
)
|
|
|
|
|
|
|
|
// Args are the command line arguments
|
|
|
|
type Args struct {
|
|
|
|
GCPeriod time.Duration
|
|
|
|
|
|
|
|
Kubeconfig string
|
|
|
|
}
|
|
|
|
|
2023-08-17 16:53:06 +03:00
|
|
|
type NfdGarbageCollector interface {
|
2023-01-03 13:23:46 +01:00
|
|
|
Run() error
|
|
|
|
Stop()
|
|
|
|
}
|
|
|
|
|
2023-08-17 16:53:06 +03:00
|
|
|
type nfdGarbageCollector struct {
|
2023-01-03 13:23:46 +01:00
|
|
|
stopChan chan struct{}
|
|
|
|
topoClient topologyclientset.Interface
|
|
|
|
gcPeriod time.Duration
|
|
|
|
factory informers.SharedInformerFactory
|
|
|
|
}
|
|
|
|
|
2023-08-17 16:53:06 +03:00
|
|
|
func New(args *Args) (NfdGarbageCollector, error) {
|
2023-01-03 13:23:46 +01:00
|
|
|
kubeconfig, err := apihelper.GetKubeconfig(args.Kubeconfig)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
stop := make(chan struct{})
|
|
|
|
|
2023-08-17 16:53:06 +03:00
|
|
|
return newNfdGarbageCollector(kubeconfig, stop, args.GCPeriod)
|
2023-01-03 13:23:46 +01:00
|
|
|
}
|
|
|
|
|
2023-08-17 16:53:06 +03:00
|
|
|
func newNfdGarbageCollector(config *restclient.Config, stop chan struct{}, gcPeriod time.Duration) (*nfdGarbageCollector, error) {
|
2023-01-03 13:23:46 +01:00
|
|
|
helper := apihelper.K8sHelpers{Kubeconfig: config}
|
|
|
|
cli, err := helper.GetTopologyClient()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
clientset := kubernetes.NewForConfigOrDie(config)
|
|
|
|
factory := informers.NewSharedInformerFactory(clientset, 5*time.Minute)
|
|
|
|
|
2023-08-17 16:53:06 +03:00
|
|
|
return &nfdGarbageCollector{
|
2023-01-03 13:23:46 +01:00
|
|
|
topoClient: cli,
|
|
|
|
stopChan: stop,
|
|
|
|
gcPeriod: gcPeriod,
|
|
|
|
factory: factory,
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
2023-08-17 16:53:06 +03:00
|
|
|
func (n *nfdGarbageCollector) deleteNRT(nodeName string) {
|
2023-02-07 16:48:01 +01:00
|
|
|
if err := n.topoClient.TopologyV1alpha2().NodeResourceTopologies().Delete(context.TODO(), nodeName, metav1.DeleteOptions{}); err != nil {
|
2023-01-03 13:23:46 +01:00
|
|
|
if errors.IsNotFound(err) {
|
2023-05-03 11:32:53 +03:00
|
|
|
klog.V(2).InfoS("NodeResourceTopology not found, omitting deletion", "nodeName", nodeName)
|
2023-01-03 13:23:46 +01:00
|
|
|
return
|
|
|
|
} else {
|
2023-05-03 11:32:53 +03:00
|
|
|
klog.ErrorS(err, "failed to delete NodeResourceTopology object", "nodeName", nodeName)
|
2023-01-03 13:23:46 +01:00
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
2023-05-03 11:32:53 +03:00
|
|
|
klog.InfoS("NodeResourceTopology object has been deleted", "nodeName", nodeName)
|
2023-01-03 13:23:46 +01:00
|
|
|
}
|
|
|
|
|
2023-08-17 16:53:06 +03:00
|
|
|
func (n *nfdGarbageCollector) deleteNodeHandler(object interface{}) {
|
2023-01-03 13:23:46 +01:00
|
|
|
// handle a case when we are starting up and need to clear stale NRT resources
|
|
|
|
obj := object
|
|
|
|
if deletedFinalStateUnknown, ok := object.(cache.DeletedFinalStateUnknown); ok {
|
2023-05-03 11:32:53 +03:00
|
|
|
klog.V(2).InfoS("found stale NodeResourceTopology object", "object", object)
|
2023-01-03 13:23:46 +01:00
|
|
|
obj = deletedFinalStateUnknown.Obj
|
|
|
|
}
|
|
|
|
|
|
|
|
node, ok := obj.(*corev1.Node)
|
|
|
|
if !ok {
|
2023-05-03 11:32:53 +03:00
|
|
|
klog.InfoS("cannot convert object to v1.Node", "object", object)
|
2023-01-03 13:23:46 +01:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
n.deleteNRT(node.GetName())
|
|
|
|
}
|
|
|
|
|
2023-08-17 18:09:14 +03:00
|
|
|
// garbageCollect removes all stale API objects
|
2023-08-17 16:53:06 +03:00
|
|
|
func (n *nfdGarbageCollector) garbageCollect() {
|
2023-08-17 18:09:14 +03:00
|
|
|
klog.InfoS("performing garbage collection")
|
2023-08-17 18:16:36 +03:00
|
|
|
nodes, err := n.factory.Core().V1().Nodes().Lister().List(labels.Everything())
|
|
|
|
if err != nil {
|
|
|
|
klog.ErrorS(err, "failed to list Node objects")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
nodeNames := sets.NewString()
|
|
|
|
for _, node := range nodes {
|
|
|
|
nodeNames.Insert(node.Name)
|
2023-01-03 13:23:46 +01:00
|
|
|
}
|
|
|
|
|
2023-02-07 16:48:01 +01:00
|
|
|
nrts, err := n.topoClient.TopologyV1alpha2().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
|
2023-01-03 13:23:46 +01:00
|
|
|
if err != nil {
|
2023-05-03 11:32:53 +03:00
|
|
|
klog.ErrorS(err, "failed to list NodeResourceTopology objects")
|
2023-01-03 13:23:46 +01:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, nrt := range nrts.Items {
|
|
|
|
key, err := cache.MetaNamespaceKeyFunc(&nrt)
|
|
|
|
if err != nil {
|
2023-05-03 11:32:53 +03:00
|
|
|
klog.ErrorS(err, "failed to create key", "noderesourcetopology", klog.KObj(&nrt))
|
2023-01-03 13:23:46 +01:00
|
|
|
continue
|
|
|
|
}
|
2023-08-17 18:16:36 +03:00
|
|
|
if !nodeNames.Has(key) {
|
2023-01-03 13:23:46 +01:00
|
|
|
n.deleteNRT(key)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// periodicGC runs garbage collector at every gcPeriod to make sure we haven't missed any node
|
2023-08-17 16:53:06 +03:00
|
|
|
func (n *nfdGarbageCollector) periodicGC(gcPeriod time.Duration) {
|
2023-08-17 17:58:16 +03:00
|
|
|
// Do initial round of garbage collection at startup time
|
|
|
|
n.garbageCollect()
|
|
|
|
|
2023-01-03 13:23:46 +01:00
|
|
|
gcTrigger := time.NewTicker(gcPeriod)
|
2023-07-04 13:57:10 +08:00
|
|
|
defer gcTrigger.Stop()
|
2023-01-03 13:23:46 +01:00
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-gcTrigger.C:
|
2023-08-17 18:09:14 +03:00
|
|
|
n.garbageCollect()
|
2023-01-03 13:23:46 +01:00
|
|
|
case <-n.stopChan:
|
2023-05-03 11:32:53 +03:00
|
|
|
klog.InfoS("shutting down periodic Garbage Collector")
|
2023-01-03 13:23:46 +01:00
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-08-17 16:53:06 +03:00
|
|
|
func (n *nfdGarbageCollector) startNodeInformer() error {
|
2023-01-03 13:23:46 +01:00
|
|
|
nodeInformer := n.factory.Core().V1().Nodes().Informer()
|
|
|
|
|
|
|
|
if _, err := nodeInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
|
|
|
|
DeleteFunc: n.deleteNodeHandler,
|
|
|
|
}); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// start informers
|
|
|
|
n.factory.Start(n.stopChan)
|
|
|
|
n.factory.WaitForCacheSync(n.stopChan)
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Run is a blocking function that removes stale NRT objects when Node is deleted and runs periodic GC to make sure any obsolete objects are removed
|
2023-08-17 16:53:06 +03:00
|
|
|
func (n *nfdGarbageCollector) Run() error {
|
2023-08-17 17:48:21 +03:00
|
|
|
if err := n.startNodeInformer(); err != nil {
|
2023-01-03 13:23:46 +01:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
// run periodic GC
|
|
|
|
n.periodicGC(n.gcPeriod)
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-08-17 16:53:06 +03:00
|
|
|
func (n *nfdGarbageCollector) Stop() {
|
2023-08-18 16:23:36 +03:00
|
|
|
close(n.stopChan)
|
2023-01-03 13:23:46 +01:00
|
|
|
}
|