1
0
Fork 0
mirror of https://github.com/kubernetes-sigs/node-feature-discovery.git synced 2025-03-31 04:04:51 +00:00

Merge pull request from PiotrProkop/nrt-garbage-collector

Add NRT garbage collector
This commit is contained in:
Kubernetes Prow Robot 2023-01-11 01:59:44 -08:00 committed by GitHub
commit ea921a8b14
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
25 changed files with 923 additions and 3 deletions

View file

@ -0,0 +1,88 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import (
"flag"
"fmt"
"os"
"time"
"k8s.io/klog/v2"
nfdtopologygarbagecollector "sigs.k8s.io/node-feature-discovery/pkg/nfd-topology-gc"
"sigs.k8s.io/node-feature-discovery/pkg/version"
)
const (
// ProgramName is the canonical name of this program
ProgramName = "nfd-topology-gc"
)
func main() {
flags := flag.NewFlagSet(ProgramName, flag.ExitOnError)
printVersion := flags.Bool("version", false, "Print version and exit.")
args := parseArgs(flags, os.Args[1:]...)
if *printVersion {
fmt.Println(ProgramName, version.Get())
os.Exit(0)
}
// Assert that the version is known
if version.Undefined() {
klog.Warningf("version not set! Set -ldflags \"-X sigs.k8s.io/node-feature-discovery/pkg/version.version=`git describe --tags --dirty --always`\" during build or run.")
}
// Get new TopologyGC instance
gc, err := nfdtopologygarbagecollector.New(args)
if err != nil {
klog.Exit(err)
}
if err = gc.Run(); err != nil {
klog.Exit(err)
}
}
func parseArgs(flags *flag.FlagSet, osArgs ...string) *nfdtopologygarbagecollector.Args {
args := initFlags(flags)
_ = flags.Parse(osArgs)
if len(flags.Args()) > 0 {
fmt.Fprintf(flags.Output(), "unknown command line argument: %s\n", flags.Args()[0])
flags.Usage()
os.Exit(2)
}
return args
}
func initFlags(flagset *flag.FlagSet) *nfdtopologygarbagecollector.Args {
args := &nfdtopologygarbagecollector.Args{}
flagset.DurationVar(&args.GCPeriod, "gc-interval", time.Duration(1)*time.Hour,
"Interval between which Garbage Collector will try to cleanup any missed but already obsolete NodeResourceTopology. [Default: 1h]")
flagset.StringVar(&args.Kubeconfig, "kubeconfig", "",
"Kubeconfig to use")
klog.InitFlags(flagset)
return args
}

View file

@ -0,0 +1,41 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import (
"flag"
"testing"
"time"
. "github.com/smartystreets/goconvey/convey"
)
func TestArgsParse(t *testing.T) {
Convey("When parsing command line arguments", t, func() {
flags := flag.NewFlagSet(ProgramName, flag.ExitOnError)
Convey("When valid -gc-interval is specified", func() {
args := parseArgs(flags,
"-gc-interval=30s")
Convey("args.GCPeriod is set to appropriate values", func() {
So(args.GCPeriod, ShouldEqual, 30*time.Second)
})
})
})
}

View file

@ -0,0 +1,9 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: node-feature-discovery
resources:
- topology-gc-clusterrole.yaml
- topology-gc-clusterrolebinding.yaml
- topology-gc-serviceaccount.yaml

View file

@ -0,0 +1,25 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: nfd-topology-gc
rules:
- apiGroups:
- ""
resources:
- nodes
verbs:
- list
- watch
- apiGroups:
- ""
resources:
- nodes/proxy
verbs:
- get
- apiGroups:
- topology.node.k8s.io
resources:
- noderesourcetopologies
verbs:
- delete
- list

View file

@ -0,0 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: nfd-topology-gc
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: nfd-topology-gc
subjects:
- kind: ServiceAccount
name: nfd-topology-gc
namespace: default

View file

@ -0,0 +1,4 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: nfd-topology-gc

View file

@ -0,0 +1,7 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: node-feature-discovery
resources:
- topology-gc.yaml

View file

@ -0,0 +1,23 @@
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: nfd
name: nfd-topology-gc
spec:
selector:
matchLabels:
app: nfd-topology-gc
template:
metadata:
labels:
app: nfd-topology-gc
spec:
dnsPolicy: ClusterFirstWithHostNet
serviceAccount: nfd-topology-gc
containers:
- name: nfd-topology-gc
image: gcr.io/k8s-staging-nfd/node-feature-discovery:master
imagePullPolicy: Always
command:
- "nfd-topology-gc"

View file

@ -94,3 +94,14 @@ Create the name of the service account which topologyUpdater will use
{{ default "default" .Values.topologyUpdater.serviceAccount.name }}
{{- end -}}
{{- end -}}
{{/*
Create the name of the service account which topologyGC will use
*/}}
{{- define "node-feature-discovery.topologyGC.serviceAccountName" -}}
{{- if .Values.topologyGC.serviceAccount.create -}}
{{ default (printf "%s-topology-gc" (include "node-feature-discovery.fullname" .)) .Values.topologyGC.serviceAccount.name }}
{{- else -}}
{{ default "default" .Values.topologyGC.serviceAccount.name }}
{{- end -}}
{{- end -}}

View file

@ -66,3 +66,34 @@ rules:
- get
- update
{{- end }}
---
{{- if and .Values.topologyGC.enable .Values.topologyGC.rbac.create .Values.topologyUpdater.enable }}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "node-feature-discovery.fullname" . }}-topology-gc
labels:
{{- include "node-feature-discovery.labels" . | nindent 4 }}
rules:
- apiGroups:
- ""
resources:
- nodes
verbs:
- list
- watch
- apiGroups:
- ""
resources:
- nodes/proxy
verbs:
- get
- apiGroups:
- topology.node.k8s.io
resources:
- noderesourcetopologies
verbs:
- delete
- list
{{- end }}

View file

@ -32,3 +32,21 @@ subjects:
name: {{ include "node-feature-discovery.topologyUpdater.serviceAccountName" . }}
namespace: {{ include "node-feature-discovery.namespace" . }}
{{- end }}
---
{{- if and .Values.topologyGC.enable .Values.topologyGC.rbac.create .Values.topologyUpdater.enable }}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ include "node-feature-discovery.fullname" . }}-topology-gc
labels:
{{- include "node-feature-discovery.labels" . | nindent 4 }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: {{ include "node-feature-discovery.fullname" . }}-topology-gc
subjects:
- kind: ServiceAccount
name: {{ .Values.topologyGC.serviceAccount.name | default "nfd-topology-gc" }}
namespace: {{ include "node-feature-discovery.namespace" . }}
{{- end }}

View file

@ -27,6 +27,21 @@ metadata:
{{- end }}
{{- end }}
---
{{- if and .Values.topologyGC.enable .Values.topologyGC.serviceAccount.create .Values.topologyUpdater.enable }}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ .Values.topologyGC.serviceAccount.name | default "nfd-topology-gc" }}
namespace: {{ include "node-feature-discovery.namespace" . }}
labels:
{{- include "node-feature-discovery.labels" . | nindent 4 }}
{{- with .Values.topologyUpdater.serviceAccount.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
{{- end }}
---
{{- if .Values.worker.serviceAccount.create }}
apiVersion: v1

View file

@ -0,0 +1,64 @@
{{- if and .Values.topologyGC.enable .Values.topologyUpdater.enable -}}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "node-feature-discovery.fullname" . }}-topology-gc
namespace: {{ include "node-feature-discovery.namespace" . }}
labels:
{{- include "node-feature-discovery.labels" . | nindent 4 }}
role: topology-gc
spec:
replicas: {{ .Values.topologyGC.replicaCount | default 1 }}
selector:
matchLabels:
{{- include "node-feature-discovery.selectorLabels" . | nindent 6 }}
role: topology-gc
template:
metadata:
labels:
{{- include "node-feature-discovery.selectorLabels" . | nindent 8 }}
role: topology-gc
annotations:
{{- toYaml .Values.topologyGC.annotations | nindent 8 }}
spec:
serviceAccountName: {{ .Values.topologyGC.serviceAccountName | default "nfd-topology-gc" }}
dnsPolicy: ClusterFirstWithHostNet
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
securityContext:
{{- toYaml .Values.topologyGC.podSecurityContext | nindent 8 }}
containers:
- name: topology-gc
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: "{{ .Values.image.pullPolicy }}"
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
command:
- "nfd-topology-gc"
args:
{{- if .Values.topologyGC.interval | empty | not }}
- "-gc-interval={{ .Values.topologyGC.interval }}"
{{- end }}
resources:
{{- toYaml .Values.topologyGC.resources | nindent 12 }}
securityContext:
{{- toYaml .Values.topologyGC.securityContext | nindent 12 }}
{{- with .Values.topologyGC.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.topologyGC.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.topologyGC.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}

View file

@ -422,6 +422,44 @@ topologyUpdater:
annotations: {}
affinity: {}
topologyGC:
enable: true
replicaCount: 1
serviceAccount:
create: true
annotations: {}
name:
rbac:
create: true
interval: 1h
podSecurityContext: {}
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop: [ "ALL" ]
readOnlyRootFilesystem: true
runAsNonRoot: true
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
nodeSelector: {}
tolerations: []
annotations: {}
affinity: {}
# Optionally use encryption for worker <--> master comms
# TODO: verify hostname is not yet supported
#

View file

@ -6,11 +6,13 @@ namespace: node-feature-discovery
bases:
- ../../base/rbac
- ../../base/rbac-topologyupdater
- ../../base/rbac-topology-gc
- ../../base/nfd-crds
- ../../base/master
- ../../base/worker-daemonset
- ../../base/noderesourcetopologies-crd
- ../../base/topologyupdater-daemonset
- ../../base/topology-gc
resources:
- namespace.yaml

View file

@ -5,8 +5,10 @@ namespace: node-feature-discovery
bases:
- ../../base/rbac-topologyupdater
- ../../base/rbac-topology-gc
- ../../base/noderesourcetopologies-crd
- ../../base/topologyupdater-daemonset
- ../../base/topology-gc
resources:
- namespace.yaml

View file

@ -173,5 +173,24 @@ We have introduced the following Chart parameters.
| `topologyUpdater.affinity` | dict | {} | Topology updater pod [affinity](https://kubernetes.io/docs/tasks/configure-pod-container/assign-pods-nodes-using-node-affinity/) |
| `topologyUpdater.config` | dict | | [configuration](../reference/topology-updater-configuration-reference) |
### Topology garbage collector parameters
| Name | Type | Default | description |
|-----------------------------------------------|--------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `topologyGC.*` | dict | | NFD Topology Garbage Collector configuration |
| `topologyGC.enable` | bool | true | Specifies whether the NFD Topology Garbage Collector should be created |
| `topologyGC.serviceAccount.create` | bool | true | Specifies whether the service account for topology garbage collector should be created |
| `topologyGC.serviceAccount.annotations` | dict | {} | Annotations to add to the service account for topology garbage collector |
| `topologyGC.serviceAccount.name` | string | | The name of the service account for topology garbage collector to use. If not set and create is true, a name is generated using the fullname template and `-topology-gc` suffix |
| `topologyGC.rbac.create` | bool | false | Specifies whether to create [RBAC][rbac] configuration for topology garbage collector |
| `topologyGC.interval` | string | 1h | Time between periodic garbage collector runs |
| `topologyGC.podSecurityContext` | dict | {} | [PodSecurityContext](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) holds pod-level security attributes and common container settings |
| `topologyGC.securityContext` | dict | {} | Container [security settings](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) |
| `topologyGC.resources` | dict | {} | Topology garbage collector pod [resources management](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/) |
| `topologyGC.nodeSelector` | dict | {} | Topology garbage collector pod [node selector](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector) |
| `topologyGC.tolerations` | dict | {} | Topology garbage collector pod [node tolerations](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/) |
| `topologyGC.annotations` | dict | {} | Topology garbage collector pod [annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) |
| `topologyGC.affinity` | dict | {} | Topology garbage collector pod [affinity](https://kubernetes.io/docs/tasks/configure-pod-container/assign-pods-nodes-using-node-affinity/) |
<!-- Links -->
[rbac]: https://kubernetes.io/docs/reference/access-authn-authz/rbac/

View file

@ -47,6 +47,14 @@ creates or updates a
resource object specific to this node. One instance of nfd-topology-updater is
supposed to be running on each node of the cluster.
## NFD-Topology-Garbage-Collector
NFD-Topology-Garbage-Collector is a daemon responsible for cleaning obsolete
[NodeResourceTopology](../usage/custom-resources#noderesourcetopology) objects,
obsolete means that there is no corresponding worker node.
One instance of nfd-topology-gc is supposed to be running in the cluster.
## Feature Discovery
Feature discovery is divided into domain-specific feature sources:

View file

@ -0,0 +1,46 @@
---
title: "Topology Garbage Collector Cmdline Reference"
layout: default
sort: 6
---
# NFD-Topology-Garbage-Collector Commandline Flags
{: .no_toc }
## Table of Contents
{: .no_toc .text-delta }
1. TOC
{:toc}
---
To quickly view available command line flags execute `nfd-topology-gc -help`.
In a docker container:
```bash
docker run {{ site.container_image }} \
nfd-topology-gc -help
```
### -h, -help
Print usage and exit.
### -version
Print version and exit.
### -gc-interval
The `-gc-interval` specifies the interval between periodic garbage collector runs.
Default: 1h
Example:
```bash
nfd-topology-gc -gc-interval=1h
```

View file

@ -1,7 +1,7 @@
---
title: "CRDs"
layout: default
sort: 6
sort: 7
---
# Custom Resources

View file

@ -1,7 +1,7 @@
---
title: "Customization guide"
layout: default
sort: 7
sort: 8
---
# Customization guide

View file

@ -1,7 +1,7 @@
---
title: "Examples and demos"
layout: default
sort: 8
sort: 9
---
# Examples and demos

View file

@ -0,0 +1,29 @@
---
title: "NFD-Topology-Garbage-Collector"
layout: default
sort: 6
---
# NFD-Topology-Garbage-Collector
{: .no_toc}
---
NFD-Topology-Garbage-Collector is preferably run as a Kubernetes deployment
with one replica. It makes sure that all
[NodeResourceTopology](custom-resources#noderesourcetopology)
have corresponding worker nodes and removes stale objects for worker nodes
which are no longer part of Kubernetes cluster.
This service watches for Node deletion events and removes NodeResourceTopology
objects upon them. It is also running periodically to make sure no event was
missed or NodeResourceTopology object was created without corresponding worker
node. The default garbage collector interval is set to 1h which is the value
when no -gc-interval is specified.
## Topology-Garbage-Collector Configuration
In Helm deployments,
(see [Topology Garbage Collector](../deployment/helm.md#topology-garbage-collector-parameters)
for parameters). NFD-Topology-Garbage-Collector will only be deployed when
topologyUpdater.enable is set to true.

View file

@ -0,0 +1,194 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nfdtopologygarbagecollector
import (
"context"
"time"
topologyclientset "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/generated/clientset/versioned"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
restclient "k8s.io/client-go/rest"
"k8s.io/client-go/tools/cache"
"k8s.io/klog/v2"
"sigs.k8s.io/node-feature-discovery/pkg/apihelper"
)
// Args are the command line arguments
type Args struct {
GCPeriod time.Duration
Kubeconfig string
}
type TopologyGC interface {
Run() error
Stop()
}
type topologyGC struct {
stopChan chan struct{}
topoClient topologyclientset.Interface
gcPeriod time.Duration
factory informers.SharedInformerFactory
}
func New(args *Args) (TopologyGC, error) {
kubeconfig, err := apihelper.GetKubeconfig(args.Kubeconfig)
if err != nil {
return nil, err
}
stop := make(chan struct{})
return newTopologyGC(kubeconfig, stop, args.GCPeriod)
}
func newTopologyGC(config *restclient.Config, stop chan struct{}, gcPeriod time.Duration) (*topologyGC, error) {
helper := apihelper.K8sHelpers{Kubeconfig: config}
cli, err := helper.GetTopologyClient()
if err != nil {
return nil, err
}
clientset := kubernetes.NewForConfigOrDie(config)
factory := informers.NewSharedInformerFactory(clientset, 5*time.Minute)
return &topologyGC{
topoClient: cli,
stopChan: stop,
gcPeriod: gcPeriod,
factory: factory,
}, nil
}
func (n *topologyGC) deleteNRT(nodeName string) {
if err := n.topoClient.TopologyV1alpha1().NodeResourceTopologies().Delete(context.TODO(), nodeName, metav1.DeleteOptions{}); err != nil {
if errors.IsNotFound(err) {
klog.V(2).Infof("NodeResourceTopology for node %s not found, omitting deletion", nodeName)
return
} else {
klog.Warningf("failed to delete NodeResourceTopology for node %s: %s", nodeName, err.Error())
return
}
}
klog.Infof("NodeResourceTopology for node %s has been deleted", nodeName)
}
func (n *topologyGC) deleteNodeHandler(object interface{}) {
// handle a case when we are starting up and need to clear stale NRT resources
obj := object
if deletedFinalStateUnknown, ok := object.(cache.DeletedFinalStateUnknown); ok {
klog.V(2).Infof("found stale NodeResourceTopology for node: %s ", object)
obj = deletedFinalStateUnknown.Obj
}
node, ok := obj.(*corev1.Node)
if !ok {
klog.Errorf("cannot convert %v to v1.Node", object)
return
}
n.deleteNRT(node.GetName())
}
func (n *topologyGC) runGC() {
klog.Infof("Running GC")
objects := n.factory.Core().V1().Nodes().Informer().GetIndexer().List()
nodes := sets.NewString()
for _, object := range objects {
key, err := cache.MetaNamespaceKeyFunc(object)
if err != nil {
klog.Warningf("cannot create key for %v: %s", object, err.Error())
continue
}
nodes.Insert(key)
}
nrts, err := n.topoClient.TopologyV1alpha1().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
if err != nil {
klog.Warningf("cannot list NRTs %s", err.Error())
return
}
for _, nrt := range nrts.Items {
key, err := cache.MetaNamespaceKeyFunc(&nrt)
if err != nil {
klog.Warningf("cannot create key for %v: %s", nrt, err.Error())
continue
}
if !nodes.Has(key) {
n.deleteNRT(key)
}
}
}
// periodicGC runs garbage collector at every gcPeriod to make sure we haven't missed any node
func (n *topologyGC) periodicGC(gcPeriod time.Duration) {
gcTrigger := time.NewTicker(gcPeriod)
for {
select {
case <-gcTrigger.C:
n.runGC()
case <-n.stopChan:
klog.Infof("shutting down periodic Garbage Collector")
return
}
}
}
func (n *topologyGC) run() error {
nodeInformer := n.factory.Core().V1().Nodes().Informer()
if _, err := nodeInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
DeleteFunc: n.deleteNodeHandler,
}); err != nil {
return err
}
// start informers
n.factory.Start(n.stopChan)
n.factory.WaitForCacheSync(n.stopChan)
n.runGC()
return nil
}
// Run is a blocking function that removes stale NRT objects when Node is deleted and runs periodic GC to make sure any obsolete objects are removed
func (n *topologyGC) Run() error {
if err := n.run(); err != nil {
return err
}
// run periodic GC
n.periodicGC(n.gcPeriod)
return nil
}
func (n *topologyGC) Stop() {
select {
case n.stopChan <- struct{}{}:
default:
}
}

View file

@ -0,0 +1,234 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nfdtopologygarbagecollector
import (
"context"
"testing"
"time"
nrtapi "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha1"
v1alpha1 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha1"
faketopologyv1alpha1 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/generated/clientset/versioned/fake"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/informers"
fakek8sclientset "k8s.io/client-go/kubernetes/fake"
. "github.com/smartystreets/goconvey/convey"
)
func TestNRTGC(t *testing.T) {
Convey("When theres is old NRT ", t, func() {
k8sClient := fakek8sclientset.NewSimpleClientset()
fakeClient := faketopologyv1alpha1.NewSimpleClientset(&nrtapi.NodeResourceTopology{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
},
})
factory := informers.NewSharedInformerFactory(k8sClient, 5*time.Minute)
stopChan := make(chan struct{}, 1)
gc := &topologyGC{
factory: factory,
topoClient: fakeClient,
stopChan: stopChan,
gcPeriod: 10 * time.Minute,
}
err := gc.run()
So(err, ShouldBeNil)
nrts, err := fakeClient.TopologyV1alpha1().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
So(err, ShouldBeNil)
So(nrts.Items, ShouldHaveLength, 0)
gc.Stop()
})
Convey("When theres is one old NRT and one up to date", t, func() {
k8sClient := fakek8sclientset.NewSimpleClientset(&corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
},
})
fakeClient := faketopologyv1alpha1.NewSimpleClientset(&nrtapi.NodeResourceTopology{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
},
},
&nrtapi.NodeResourceTopology{
ObjectMeta: metav1.ObjectMeta{
Name: "node2",
},
},
)
stopChan := make(chan struct{}, 1)
factory := informers.NewSharedInformerFactory(k8sClient, 5*time.Minute)
gc := &topologyGC{
factory: factory,
topoClient: fakeClient,
stopChan: stopChan,
gcPeriod: 10 * time.Minute,
}
err := gc.run()
So(err, ShouldBeNil)
nrts, err := fakeClient.TopologyV1alpha1().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
So(err, ShouldBeNil)
So(nrts.Items, ShouldHaveLength, 1)
So(nrts.Items[0].GetName(), ShouldEqual, "node1")
})
Convey("Should react to delete event", t, func() {
k8sClient := fakek8sclientset.NewSimpleClientset(
&corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
},
},
&corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node2",
},
},
)
fakeClient := faketopologyv1alpha1.NewSimpleClientset(
&nrtapi.NodeResourceTopology{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
},
},
&nrtapi.NodeResourceTopology{
ObjectMeta: metav1.ObjectMeta{
Name: "node2",
},
},
)
stopChan := make(chan struct{}, 1)
factory := informers.NewSharedInformerFactory(k8sClient, 5*time.Minute)
gc := &topologyGC{
factory: factory,
topoClient: fakeClient,
stopChan: stopChan,
gcPeriod: 10 * time.Minute,
}
err := gc.run()
So(err, ShouldBeNil)
nrts, err := fakeClient.TopologyV1alpha1().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
So(err, ShouldBeNil)
So(nrts.Items, ShouldHaveLength, 2)
err = k8sClient.CoreV1().Nodes().Delete(context.TODO(), "node1", metav1.DeleteOptions{})
So(err, ShouldBeNil)
// simple sleep with retry loop to make sure indexer will pick up event and trigger deleteNode Function
deleted := false
for i := 0; i < 5; i++ {
nrts, err := fakeClient.TopologyV1alpha1().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
So(err, ShouldBeNil)
if len(nrts.Items) == 1 {
deleted = true
break
}
time.Sleep(time.Second)
}
So(deleted, ShouldBeTrue)
})
Convey("periodic GC should remove obsolete NRT", t, func() {
k8sClient := fakek8sclientset.NewSimpleClientset(
&corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
},
},
&corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node2",
},
},
)
fakeClient := faketopologyv1alpha1.NewSimpleClientset(
&nrtapi.NodeResourceTopology{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
},
},
&nrtapi.NodeResourceTopology{
ObjectMeta: metav1.ObjectMeta{
Name: "node2",
},
},
)
stopChan := make(chan struct{}, 1)
factory := informers.NewSharedInformerFactory(k8sClient, 5*time.Minute)
gc := &topologyGC{
factory: factory,
topoClient: fakeClient,
stopChan: stopChan,
gcPeriod: time.Second,
}
err := gc.run()
So(err, ShouldBeNil)
nrts, err := fakeClient.TopologyV1alpha1().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
So(err, ShouldBeNil)
So(nrts.Items, ShouldHaveLength, 2)
nrt := v1alpha1.NodeResourceTopology{
ObjectMeta: metav1.ObjectMeta{
Name: "not-existing",
},
}
go gc.periodicGC(time.Second)
_, err = fakeClient.TopologyV1alpha1().NodeResourceTopologies().Create(context.TODO(), &nrt, metav1.CreateOptions{})
So(err, ShouldBeNil)
// simple sleep with retry loop to make sure GC was triggered
deleted := false
for i := 0; i < 5; i++ {
nrts, err := fakeClient.TopologyV1alpha1().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
So(err, ShouldBeNil)
if len(nrts.Items) == 2 {
deleted = true
break
}
time.Sleep(2 * time.Second)
}
So(deleted, ShouldBeTrue)
})
}