1
0
Fork 0
mirror of https://github.com/kubernetes-sigs/node-feature-discovery.git synced 2025-03-17 05:48:21 +00:00

nfd-gc: use paging when listing CRs

List NodeFeature and NodeResourceTopology objects in pages of 200 items.
This reduces memory consumption and eliminates timeouts (on the
apiserver side) in big clusters of thousands of nodes.
This commit is contained in:
Markus Lehtonen 2024-07-26 17:05:14 +03:00
parent 57f1b79856
commit 45164f580a

View file

@ -26,6 +26,7 @@ import (
"k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/sets"
metadataclient "k8s.io/client-go/metadata" metadataclient "k8s.io/client-go/metadata"
"k8s.io/client-go/metadata/metadatainformer" "k8s.io/client-go/metadata/metadatainformer"
@ -152,37 +153,47 @@ func (n *nfdGarbageCollector) garbageCollect() {
nodeNames.Insert(meta.Name) nodeNames.Insert(meta.Name)
} }
// Handle NodeFeature objects listAndHandle := func(gvr schema.GroupVersionResource, handler func(metav1.PartialObjectMetadata)) {
objMetas, err := n.client.Resource(gvrNF).List(context.TODO(), metav1.ListOptions{}) opts := metav1.ListOptions{
Limit: 200,
}
for {
rsp, err := n.client.Resource(gvr).List(context.TODO(), opts)
if errors.IsNotFound(err) { if errors.IsNotFound(err) {
klog.V(2).InfoS("NodeFeature CRD does not exist") klog.V(2).InfoS("resource does not exist", "resource", gvr)
break
} else if err != nil { } else if err != nil {
klog.ErrorS(err, "failed to list NodeFeature objects") klog.ErrorS(err, "failed to list objects", "resource", gvr)
} else { break
for _, nf := range objMetas.Items {
nodeName, ok := nf.GetLabels()[nfdv1alpha1.NodeFeatureObjNodeNameLabel]
if !ok {
klog.InfoS("node name label missing from NodeFeature object", "nodefeature", klog.KObj(&nf))
} }
if !nodeNames.Has(nodeName) { for _, item := range rsp.Items {
n.deleteNodeFeature(nf.Namespace, nf.Name) handler(item)
} }
if rsp.ListMeta.Continue == "" {
break
}
opts.Continue = rsp.ListMeta.Continue
} }
} }
// Handle NodeFeature objects
listAndHandle(gvrNF, func(meta metav1.PartialObjectMetadata) {
nodeName, ok := meta.GetLabels()[nfdv1alpha1.NodeFeatureObjNodeNameLabel]
if !ok {
klog.InfoS("node name label missing from NodeFeature object", "nodefeature", klog.KObj(&meta))
}
if !nodeNames.Has(nodeName) {
n.deleteNodeFeature(meta.Namespace, meta.Name)
}
})
// Handle NodeResourceTopology objects // Handle NodeResourceTopology objects
objMetas, err = n.client.Resource(gvrNRT).List(context.TODO(), metav1.ListOptions{}) listAndHandle(gvrNRT, func(meta metav1.PartialObjectMetadata) {
if errors.IsNotFound(err) { if !nodeNames.Has(meta.Name) {
klog.V(2).InfoS("NodeResourceTopology CRD does not exist") n.deleteNRT(meta.Name)
} else if err != nil {
klog.ErrorS(err, "failed to list NodeResourceTopology objects")
} else {
for _, nrt := range objMetas.Items {
if !nodeNames.Has(nrt.Name) {
n.deleteNRT(nrt.Name)
}
}
} }
})
} }
// periodicGC runs garbage collector at every gcPeriod to make sure we haven't missed any node // periodicGC runs garbage collector at every gcPeriod to make sure we haven't missed any node