Fork 0
mirror of https://github.com/kubernetes-sigs/node-feature-discovery.git synced 2025-03-10 10:47:20 +00:00
PiotrProkop 59afae50ba Add NodeResourceTopology garbage collector
NodeResourceTopology(aka NRT) custom resource is used to enable NUMA aware Scheduling in Kubernetes.
As of now node-feature-discovery daemons are used to advertise those
resources but there is no service responsible for removing obsolete
objects(without corresponding Kubernetes node).

This patch adds new daemon called nfd-topology-gc which removes old

Signed-off-by: PiotrProkop <pprokop@nvidia.com>
2023-01-11 10:15:21 +01:00

234 lines
6 KiB

Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
See the License for the specific language governing permissions and
limitations under the License.
package nfdtopologygarbagecollector
import (
nrtapi "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha1"
v1alpha1 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha1"
faketopologyv1alpha1 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/generated/clientset/versioned/fake"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
fakek8sclientset "k8s.io/client-go/kubernetes/fake"
. "github.com/smartystreets/goconvey/convey"
func TestNRTGC(t *testing.T) {
Convey("When theres is old NRT ", t, func() {
k8sClient := fakek8sclientset.NewSimpleClientset()
fakeClient := faketopologyv1alpha1.NewSimpleClientset(&nrtapi.NodeResourceTopology{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
factory := informers.NewSharedInformerFactory(k8sClient, 5*time.Minute)
stopChan := make(chan struct{}, 1)
gc := &topologyGC{
factory: factory,
topoClient: fakeClient,
stopChan: stopChan,
gcPeriod: 10 * time.Minute,
err := gc.run()
So(err, ShouldBeNil)
nrts, err := fakeClient.TopologyV1alpha1().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
So(err, ShouldBeNil)
So(nrts.Items, ShouldHaveLength, 0)
Convey("When theres is one old NRT and one up to date", t, func() {
k8sClient := fakek8sclientset.NewSimpleClientset(&corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
fakeClient := faketopologyv1alpha1.NewSimpleClientset(&nrtapi.NodeResourceTopology{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
ObjectMeta: metav1.ObjectMeta{
Name: "node2",
stopChan := make(chan struct{}, 1)
factory := informers.NewSharedInformerFactory(k8sClient, 5*time.Minute)
gc := &topologyGC{
factory: factory,
topoClient: fakeClient,
stopChan: stopChan,
gcPeriod: 10 * time.Minute,
err := gc.run()
So(err, ShouldBeNil)
nrts, err := fakeClient.TopologyV1alpha1().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
So(err, ShouldBeNil)
So(nrts.Items, ShouldHaveLength, 1)
So(nrts.Items[0].GetName(), ShouldEqual, "node1")
Convey("Should react to delete event", t, func() {
k8sClient := fakek8sclientset.NewSimpleClientset(
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
ObjectMeta: metav1.ObjectMeta{
Name: "node2",
fakeClient := faketopologyv1alpha1.NewSimpleClientset(
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
ObjectMeta: metav1.ObjectMeta{
Name: "node2",
stopChan := make(chan struct{}, 1)
factory := informers.NewSharedInformerFactory(k8sClient, 5*time.Minute)
gc := &topologyGC{
factory: factory,
topoClient: fakeClient,
stopChan: stopChan,
gcPeriod: 10 * time.Minute,
err := gc.run()
So(err, ShouldBeNil)
nrts, err := fakeClient.TopologyV1alpha1().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
So(err, ShouldBeNil)
So(nrts.Items, ShouldHaveLength, 2)
err = k8sClient.CoreV1().Nodes().Delete(context.TODO(), "node1", metav1.DeleteOptions{})
So(err, ShouldBeNil)
// simple sleep with retry loop to make sure indexer will pick up event and trigger deleteNode Function
deleted := false
for i := 0; i < 5; i++ {
nrts, err := fakeClient.TopologyV1alpha1().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
So(err, ShouldBeNil)
if len(nrts.Items) == 1 {
deleted = true
So(deleted, ShouldBeTrue)
Convey("periodic GC should remove obsolete NRT", t, func() {
k8sClient := fakek8sclientset.NewSimpleClientset(
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
ObjectMeta: metav1.ObjectMeta{
Name: "node2",
fakeClient := faketopologyv1alpha1.NewSimpleClientset(
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
ObjectMeta: metav1.ObjectMeta{
Name: "node2",
stopChan := make(chan struct{}, 1)
factory := informers.NewSharedInformerFactory(k8sClient, 5*time.Minute)
gc := &topologyGC{
factory: factory,
topoClient: fakeClient,
stopChan: stopChan,
gcPeriod: time.Second,
err := gc.run()
So(err, ShouldBeNil)
nrts, err := fakeClient.TopologyV1alpha1().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
So(err, ShouldBeNil)
So(nrts.Items, ShouldHaveLength, 2)
nrt := v1alpha1.NodeResourceTopology{
ObjectMeta: metav1.ObjectMeta{
Name: "not-existing",
go gc.periodicGC(time.Second)
_, err = fakeClient.TopologyV1alpha1().NodeResourceTopologies().Create(context.TODO(), &nrt, metav1.CreateOptions{})
So(err, ShouldBeNil)
// simple sleep with retry loop to make sure GC was triggered
deleted := false
for i := 0; i < 5; i++ {
nrts, err := fakeClient.TopologyV1alpha1().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
So(err, ShouldBeNil)
if len(nrts.Items) == 2 {
deleted = true
time.Sleep(2 * time.Second)
So(deleted, ShouldBeTrue)