mirror of
https://github.com/kubernetes-sigs/node-feature-discovery.git
synced 2024-12-14 11:57:51 +00:00
Add NodeResourceTopology garbage collector
NodeResourceTopology(aka NRT) custom resource is used to enable NUMA aware Scheduling in Kubernetes. As of now node-feature-discovery daemons are used to advertise those resources but there is no service responsible for removing obsolete objects(without corresponding Kubernetes node). This patch adds new daemon called nfd-topology-gc which removes old NRTs. Signed-off-by: PiotrProkop <pprokop@nvidia.com>
This commit is contained in:
parent
0159ab04e7
commit
59afae50ba
18 changed files with 818 additions and 0 deletions
88
cmd/nfd-topology-gc/main.go
Normal file
88
cmd/nfd-topology-gc/main.go
Normal file
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
Copyright 2023 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
nfdtopologygarbagecollector "sigs.k8s.io/node-feature-discovery/pkg/nfd-topology-gc"
|
||||
"sigs.k8s.io/node-feature-discovery/pkg/version"
|
||||
)
|
||||
|
||||
const (
|
||||
// ProgramName is the canonical name of this program
|
||||
ProgramName = "nfd-topology-gc"
|
||||
)
|
||||
|
||||
func main() {
|
||||
flags := flag.NewFlagSet(ProgramName, flag.ExitOnError)
|
||||
|
||||
printVersion := flags.Bool("version", false, "Print version and exit.")
|
||||
|
||||
args := parseArgs(flags, os.Args[1:]...)
|
||||
|
||||
if *printVersion {
|
||||
fmt.Println(ProgramName, version.Get())
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
// Assert that the version is known
|
||||
if version.Undefined() {
|
||||
klog.Warningf("version not set! Set -ldflags \"-X sigs.k8s.io/node-feature-discovery/pkg/version.version=`git describe --tags --dirty --always`\" during build or run.")
|
||||
}
|
||||
|
||||
// Get new TopologyGC instance
|
||||
gc, err := nfdtopologygarbagecollector.New(args)
|
||||
if err != nil {
|
||||
klog.Exit(err)
|
||||
}
|
||||
|
||||
if err = gc.Run(); err != nil {
|
||||
klog.Exit(err)
|
||||
}
|
||||
}
|
||||
|
||||
func parseArgs(flags *flag.FlagSet, osArgs ...string) *nfdtopologygarbagecollector.Args {
|
||||
args := initFlags(flags)
|
||||
|
||||
_ = flags.Parse(osArgs)
|
||||
if len(flags.Args()) > 0 {
|
||||
fmt.Fprintf(flags.Output(), "unknown command line argument: %s\n", flags.Args()[0])
|
||||
flags.Usage()
|
||||
os.Exit(2)
|
||||
}
|
||||
|
||||
return args
|
||||
}
|
||||
|
||||
func initFlags(flagset *flag.FlagSet) *nfdtopologygarbagecollector.Args {
|
||||
args := &nfdtopologygarbagecollector.Args{}
|
||||
|
||||
flagset.DurationVar(&args.GCPeriod, "gc-interval", time.Duration(1)*time.Hour,
|
||||
"Interval between which Garbage Collector will try to cleanup any missed but already obsolete NodeResourceTopology. [Default: 1h]")
|
||||
flagset.StringVar(&args.Kubeconfig, "kubeconfig", "",
|
||||
"Kubeconfig to use")
|
||||
|
||||
klog.InitFlags(flagset)
|
||||
|
||||
return args
|
||||
}
|
41
cmd/nfd-topology-gc/main_test.go
Normal file
41
cmd/nfd-topology-gc/main_test.go
Normal file
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
Copyright 2023 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
. "github.com/smartystreets/goconvey/convey"
|
||||
)
|
||||
|
||||
func TestArgsParse(t *testing.T) {
|
||||
Convey("When parsing command line arguments", t, func() {
|
||||
flags := flag.NewFlagSet(ProgramName, flag.ExitOnError)
|
||||
|
||||
Convey("When valid -gc-interval is specified", func() {
|
||||
args := parseArgs(flags,
|
||||
"-gc-interval=30s")
|
||||
|
||||
Convey("args.GCPeriod is set to appropriate values", func() {
|
||||
So(args.GCPeriod, ShouldEqual, 30*time.Second)
|
||||
})
|
||||
})
|
||||
|
||||
})
|
||||
}
|
9
deployment/base/rbac-topology-gc/kustomization.yaml
Normal file
9
deployment/base/rbac-topology-gc/kustomization.yaml
Normal file
|
@ -0,0 +1,9 @@
|
|||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
namespace: node-feature-discovery
|
||||
|
||||
resources:
|
||||
- topology-gc-clusterrole.yaml
|
||||
- topology-gc-clusterrolebinding.yaml
|
||||
- topology-gc-serviceaccount.yaml
|
|
@ -0,0 +1,25 @@
|
|||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: nfd-topology-gc
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes/proxy
|
||||
verbs:
|
||||
- get
|
||||
- apiGroups:
|
||||
- topology.node.k8s.io
|
||||
resources:
|
||||
- noderesourcetopologies
|
||||
verbs:
|
||||
- delete
|
||||
- list
|
|
@ -0,0 +1,12 @@
|
|||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: nfd-topology-gc
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: nfd-topology-gc
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: nfd-topology-gc
|
||||
namespace: default
|
|
@ -0,0 +1,4 @@
|
|||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: nfd-topology-gc
|
7
deployment/base/topology-gc/kustomization.yaml
Normal file
7
deployment/base/topology-gc/kustomization.yaml
Normal file
|
@ -0,0 +1,7 @@
|
|||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
namespace: node-feature-discovery
|
||||
|
||||
resources:
|
||||
- topology-gc.yaml
|
23
deployment/base/topology-gc/topology-gc.yaml
Normal file
23
deployment/base/topology-gc/topology-gc.yaml
Normal file
|
@ -0,0 +1,23 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
app: nfd
|
||||
name: nfd-topology-gc
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: nfd-topology-gc
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: nfd-topology-gc
|
||||
spec:
|
||||
dnsPolicy: ClusterFirstWithHostNet
|
||||
serviceAccount: nfd-topology-gc
|
||||
containers:
|
||||
- name: nfd-topology-gc
|
||||
image: gcr.io/k8s-staging-nfd/node-feature-discovery:master
|
||||
imagePullPolicy: Always
|
||||
command:
|
||||
- "nfd-topology-gc"
|
|
@ -94,3 +94,14 @@ Create the name of the service account which topologyUpdater will use
|
|||
{{ default "default" .Values.topologyUpdater.serviceAccount.name }}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Create the name of the service account which topologyGC will use
|
||||
*/}}
|
||||
{{- define "node-feature-discovery.topologyGC.serviceAccountName" -}}
|
||||
{{- if .Values.topologyGC.serviceAccount.create -}}
|
||||
{{ default (printf "%s-topology-gc" (include "node-feature-discovery.fullname" .)) .Values.topologyGC.serviceAccount.name }}
|
||||
{{- else -}}
|
||||
{{ default "default" .Values.topologyGC.serviceAccount.name }}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
|
|
@ -66,3 +66,34 @@ rules:
|
|||
- get
|
||||
- update
|
||||
{{- end }}
|
||||
|
||||
---
|
||||
{{- if and .Values.topologyGC.enable .Values.topologyGC.rbac.create .Values.topologyUpdater.enable }}
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-topology-gc
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes/proxy
|
||||
verbs:
|
||||
- get
|
||||
- apiGroups:
|
||||
- topology.node.k8s.io
|
||||
resources:
|
||||
- noderesourcetopologies
|
||||
verbs:
|
||||
- delete
|
||||
- list
|
||||
{{- end }}
|
||||
|
|
|
@ -32,3 +32,21 @@ subjects:
|
|||
name: {{ include "node-feature-discovery.topologyUpdater.serviceAccountName" . }}
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
{{- end }}
|
||||
|
||||
---
|
||||
{{- if and .Values.topologyGC.enable .Values.topologyGC.rbac.create .Values.topologyUpdater.enable }}
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-topology-gc
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-topology-gc
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: {{ .Values.topologyGC.serviceAccount.name | default "nfd-topology-gc" }}
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
{{- end }}
|
||||
|
|
|
@ -27,6 +27,21 @@ metadata:
|
|||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
---
|
||||
{{- if and .Values.topologyGC.enable .Values.topologyGC.serviceAccount.create .Values.topologyUpdater.enable }}
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: {{ .Values.topologyGC.serviceAccount.name | default "nfd-topology-gc" }}
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
{{- with .Values.topologyUpdater.serviceAccount.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
---
|
||||
{{- if .Values.worker.serviceAccount.create }}
|
||||
apiVersion: v1
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
{{- if and .Values.topologyGC.enable .Values.topologyUpdater.enable -}}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ include "node-feature-discovery.fullname" . }}-topology-gc
|
||||
namespace: {{ include "node-feature-discovery.namespace" . }}
|
||||
labels:
|
||||
{{- include "node-feature-discovery.labels" . | nindent 4 }}
|
||||
role: topology-gc
|
||||
spec:
|
||||
replicas: {{ .Values.topologyGC.replicaCount | default 1 }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "node-feature-discovery.selectorLabels" . | nindent 6 }}
|
||||
role: topology-gc
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
{{- include "node-feature-discovery.selectorLabels" . | nindent 8 }}
|
||||
role: topology-gc
|
||||
annotations:
|
||||
{{- toYaml .Values.topologyGC.annotations | nindent 8 }}
|
||||
spec:
|
||||
serviceAccountName: {{ .Values.topologyGC.serviceAccountName | default "nfd-topology-gc" }}
|
||||
dnsPolicy: ClusterFirstWithHostNet
|
||||
{{- with .Values.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.topologyGC.podSecurityContext | nindent 8 }}
|
||||
containers:
|
||||
- name: topology-gc
|
||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
||||
imagePullPolicy: "{{ .Values.image.pullPolicy }}"
|
||||
env:
|
||||
- name: NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
command:
|
||||
- "nfd-topology-gc"
|
||||
args:
|
||||
{{- if .Values.topologyGC.interval | empty | not }}
|
||||
- "-gc-interval={{ .Values.topologyGC.interval }}"
|
||||
{{- end }}
|
||||
resources:
|
||||
{{- toYaml .Values.topologyGC.resources | nindent 12 }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.topologyGC.securityContext | nindent 12 }}
|
||||
|
||||
{{- with .Values.topologyGC.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.topologyGC.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.topologyGC.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
|
@ -422,6 +422,44 @@ topologyUpdater:
|
|||
annotations: {}
|
||||
affinity: {}
|
||||
|
||||
topologyGC:
|
||||
enable: true
|
||||
replicaCount: 1
|
||||
|
||||
serviceAccount:
|
||||
create: true
|
||||
annotations: {}
|
||||
name:
|
||||
rbac:
|
||||
create: true
|
||||
|
||||
interval: 1h
|
||||
|
||||
podSecurityContext: {}
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop: [ "ALL" ]
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
|
||||
resources: {}
|
||||
# We usually recommend not to specify default resources and to leave this as a conscious
|
||||
# choice for the user. This also increases chances charts run on environments with little
|
||||
# resources, such as Minikube. If you do want to specify resources, uncomment the following
|
||||
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
|
||||
# limits:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
# requests:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
|
||||
nodeSelector: {}
|
||||
tolerations: []
|
||||
annotations: {}
|
||||
affinity: {}
|
||||
|
||||
# Optionally use encryption for worker <--> master comms
|
||||
# TODO: verify hostname is not yet supported
|
||||
#
|
||||
|
|
|
@ -6,11 +6,13 @@ namespace: node-feature-discovery
|
|||
bases:
|
||||
- ../../base/rbac
|
||||
- ../../base/rbac-topologyupdater
|
||||
- ../../base/rbac-topology-gc
|
||||
- ../../base/nfd-crds
|
||||
- ../../base/master
|
||||
- ../../base/worker-daemonset
|
||||
- ../../base/noderesourcetopologies-crd
|
||||
- ../../base/topologyupdater-daemonset
|
||||
- ../../base/topology-gc
|
||||
|
||||
resources:
|
||||
- namespace.yaml
|
||||
|
|
|
@ -5,8 +5,10 @@ namespace: node-feature-discovery
|
|||
|
||||
bases:
|
||||
- ../../base/rbac-topologyupdater
|
||||
- ../../base/rbac-topology-gc
|
||||
- ../../base/noderesourcetopologies-crd
|
||||
- ../../base/topologyupdater-daemonset
|
||||
- ../../base/topology-gc
|
||||
|
||||
resources:
|
||||
- namespace.yaml
|
||||
|
|
194
pkg/nfd-topology-gc/nfd-nrt-gc.go
Normal file
194
pkg/nfd-topology-gc/nfd-nrt-gc.go
Normal file
|
@ -0,0 +1,194 @@
|
|||
/*
|
||||
Copyright 2023 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package nfdtopologygarbagecollector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
topologyclientset "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/generated/clientset/versioned"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/client-go/informers"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
restclient "k8s.io/client-go/rest"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
"sigs.k8s.io/node-feature-discovery/pkg/apihelper"
|
||||
)
|
||||
|
||||
// Args are the command line arguments
|
||||
type Args struct {
|
||||
GCPeriod time.Duration
|
||||
|
||||
Kubeconfig string
|
||||
}
|
||||
|
||||
type TopologyGC interface {
|
||||
Run() error
|
||||
Stop()
|
||||
}
|
||||
|
||||
type topologyGC struct {
|
||||
stopChan chan struct{}
|
||||
topoClient topologyclientset.Interface
|
||||
gcPeriod time.Duration
|
||||
factory informers.SharedInformerFactory
|
||||
}
|
||||
|
||||
func New(args *Args) (TopologyGC, error) {
|
||||
kubeconfig, err := apihelper.GetKubeconfig(args.Kubeconfig)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
stop := make(chan struct{})
|
||||
|
||||
return newTopologyGC(kubeconfig, stop, args.GCPeriod)
|
||||
}
|
||||
|
||||
func newTopologyGC(config *restclient.Config, stop chan struct{}, gcPeriod time.Duration) (*topologyGC, error) {
|
||||
helper := apihelper.K8sHelpers{Kubeconfig: config}
|
||||
cli, err := helper.GetTopologyClient()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
clientset := kubernetes.NewForConfigOrDie(config)
|
||||
factory := informers.NewSharedInformerFactory(clientset, 5*time.Minute)
|
||||
|
||||
return &topologyGC{
|
||||
topoClient: cli,
|
||||
stopChan: stop,
|
||||
gcPeriod: gcPeriod,
|
||||
factory: factory,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (n *topologyGC) deleteNRT(nodeName string) {
|
||||
if err := n.topoClient.TopologyV1alpha1().NodeResourceTopologies().Delete(context.TODO(), nodeName, metav1.DeleteOptions{}); err != nil {
|
||||
if errors.IsNotFound(err) {
|
||||
klog.V(2).Infof("NodeResourceTopology for node %s not found, omitting deletion", nodeName)
|
||||
return
|
||||
} else {
|
||||
klog.Warningf("failed to delete NodeResourceTopology for node %s: %s", nodeName, err.Error())
|
||||
return
|
||||
}
|
||||
}
|
||||
klog.Infof("NodeResourceTopology for node %s has been deleted", nodeName)
|
||||
}
|
||||
|
||||
func (n *topologyGC) deleteNodeHandler(object interface{}) {
|
||||
// handle a case when we are starting up and need to clear stale NRT resources
|
||||
obj := object
|
||||
if deletedFinalStateUnknown, ok := object.(cache.DeletedFinalStateUnknown); ok {
|
||||
klog.V(2).Infof("found stale NodeResourceTopology for node: %s ", object)
|
||||
obj = deletedFinalStateUnknown.Obj
|
||||
}
|
||||
|
||||
node, ok := obj.(*corev1.Node)
|
||||
if !ok {
|
||||
klog.Errorf("cannot convert %v to v1.Node", object)
|
||||
return
|
||||
}
|
||||
|
||||
n.deleteNRT(node.GetName())
|
||||
}
|
||||
|
||||
func (n *topologyGC) runGC() {
|
||||
klog.Infof("Running GC")
|
||||
objects := n.factory.Core().V1().Nodes().Informer().GetIndexer().List()
|
||||
nodes := sets.NewString()
|
||||
for _, object := range objects {
|
||||
key, err := cache.MetaNamespaceKeyFunc(object)
|
||||
if err != nil {
|
||||
klog.Warningf("cannot create key for %v: %s", object, err.Error())
|
||||
continue
|
||||
}
|
||||
nodes.Insert(key)
|
||||
}
|
||||
|
||||
nrts, err := n.topoClient.TopologyV1alpha1().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
|
||||
if err != nil {
|
||||
klog.Warningf("cannot list NRTs %s", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
for _, nrt := range nrts.Items {
|
||||
key, err := cache.MetaNamespaceKeyFunc(&nrt)
|
||||
if err != nil {
|
||||
klog.Warningf("cannot create key for %v: %s", nrt, err.Error())
|
||||
continue
|
||||
}
|
||||
if !nodes.Has(key) {
|
||||
n.deleteNRT(key)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// periodicGC runs garbage collector at every gcPeriod to make sure we haven't missed any node
|
||||
func (n *topologyGC) periodicGC(gcPeriod time.Duration) {
|
||||
gcTrigger := time.NewTicker(gcPeriod)
|
||||
for {
|
||||
select {
|
||||
case <-gcTrigger.C:
|
||||
n.runGC()
|
||||
case <-n.stopChan:
|
||||
klog.Infof("shutting down periodic Garbage Collector")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (n *topologyGC) run() error {
|
||||
nodeInformer := n.factory.Core().V1().Nodes().Informer()
|
||||
|
||||
if _, err := nodeInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||
DeleteFunc: n.deleteNodeHandler,
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// start informers
|
||||
n.factory.Start(n.stopChan)
|
||||
n.factory.WaitForCacheSync(n.stopChan)
|
||||
|
||||
n.runGC()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Run is a blocking function that removes stale NRT objects when Node is deleted and runs periodic GC to make sure any obsolete objects are removed
|
||||
func (n *topologyGC) Run() error {
|
||||
if err := n.run(); err != nil {
|
||||
return err
|
||||
}
|
||||
// run periodic GC
|
||||
n.periodicGC(n.gcPeriod)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *topologyGC) Stop() {
|
||||
select {
|
||||
case n.stopChan <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
}
|
234
pkg/nfd-topology-gc/nfd-nrt-gc_test.go
Normal file
234
pkg/nfd-topology-gc/nfd-nrt-gc_test.go
Normal file
|
@ -0,0 +1,234 @@
|
|||
/*
|
||||
Copyright 2023 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package nfdtopologygarbagecollector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
nrtapi "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha1"
|
||||
v1alpha1 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha1"
|
||||
faketopologyv1alpha1 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/generated/clientset/versioned/fake"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/client-go/informers"
|
||||
fakek8sclientset "k8s.io/client-go/kubernetes/fake"
|
||||
|
||||
. "github.com/smartystreets/goconvey/convey"
|
||||
)
|
||||
|
||||
func TestNRTGC(t *testing.T) {
|
||||
Convey("When theres is old NRT ", t, func() {
|
||||
k8sClient := fakek8sclientset.NewSimpleClientset()
|
||||
|
||||
fakeClient := faketopologyv1alpha1.NewSimpleClientset(&nrtapi.NodeResourceTopology{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "node1",
|
||||
},
|
||||
})
|
||||
factory := informers.NewSharedInformerFactory(k8sClient, 5*time.Minute)
|
||||
|
||||
stopChan := make(chan struct{}, 1)
|
||||
|
||||
gc := &topologyGC{
|
||||
factory: factory,
|
||||
topoClient: fakeClient,
|
||||
stopChan: stopChan,
|
||||
gcPeriod: 10 * time.Minute,
|
||||
}
|
||||
|
||||
err := gc.run()
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
nrts, err := fakeClient.TopologyV1alpha1().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
|
||||
So(err, ShouldBeNil)
|
||||
So(nrts.Items, ShouldHaveLength, 0)
|
||||
|
||||
gc.Stop()
|
||||
})
|
||||
Convey("When theres is one old NRT and one up to date", t, func() {
|
||||
k8sClient := fakek8sclientset.NewSimpleClientset(&corev1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "node1",
|
||||
},
|
||||
})
|
||||
|
||||
fakeClient := faketopologyv1alpha1.NewSimpleClientset(&nrtapi.NodeResourceTopology{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "node1",
|
||||
},
|
||||
},
|
||||
&nrtapi.NodeResourceTopology{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "node2",
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
stopChan := make(chan struct{}, 1)
|
||||
|
||||
factory := informers.NewSharedInformerFactory(k8sClient, 5*time.Minute)
|
||||
|
||||
gc := &topologyGC{
|
||||
factory: factory,
|
||||
topoClient: fakeClient,
|
||||
stopChan: stopChan,
|
||||
gcPeriod: 10 * time.Minute,
|
||||
}
|
||||
|
||||
err := gc.run()
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
nrts, err := fakeClient.TopologyV1alpha1().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
|
||||
So(err, ShouldBeNil)
|
||||
So(nrts.Items, ShouldHaveLength, 1)
|
||||
So(nrts.Items[0].GetName(), ShouldEqual, "node1")
|
||||
|
||||
})
|
||||
Convey("Should react to delete event", t, func() {
|
||||
k8sClient := fakek8sclientset.NewSimpleClientset(
|
||||
&corev1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "node1",
|
||||
},
|
||||
},
|
||||
&corev1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "node2",
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
fakeClient := faketopologyv1alpha1.NewSimpleClientset(
|
||||
&nrtapi.NodeResourceTopology{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "node1",
|
||||
},
|
||||
},
|
||||
&nrtapi.NodeResourceTopology{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "node2",
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
stopChan := make(chan struct{}, 1)
|
||||
|
||||
factory := informers.NewSharedInformerFactory(k8sClient, 5*time.Minute)
|
||||
gc := &topologyGC{
|
||||
factory: factory,
|
||||
topoClient: fakeClient,
|
||||
stopChan: stopChan,
|
||||
gcPeriod: 10 * time.Minute,
|
||||
}
|
||||
|
||||
err := gc.run()
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
nrts, err := fakeClient.TopologyV1alpha1().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
So(nrts.Items, ShouldHaveLength, 2)
|
||||
|
||||
err = k8sClient.CoreV1().Nodes().Delete(context.TODO(), "node1", metav1.DeleteOptions{})
|
||||
So(err, ShouldBeNil)
|
||||
// simple sleep with retry loop to make sure indexer will pick up event and trigger deleteNode Function
|
||||
deleted := false
|
||||
for i := 0; i < 5; i++ {
|
||||
nrts, err := fakeClient.TopologyV1alpha1().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
if len(nrts.Items) == 1 {
|
||||
deleted = true
|
||||
break
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
So(deleted, ShouldBeTrue)
|
||||
})
|
||||
Convey("periodic GC should remove obsolete NRT", t, func() {
|
||||
k8sClient := fakek8sclientset.NewSimpleClientset(
|
||||
&corev1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "node1",
|
||||
},
|
||||
},
|
||||
&corev1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "node2",
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
fakeClient := faketopologyv1alpha1.NewSimpleClientset(
|
||||
&nrtapi.NodeResourceTopology{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "node1",
|
||||
},
|
||||
},
|
||||
&nrtapi.NodeResourceTopology{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "node2",
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
stopChan := make(chan struct{}, 1)
|
||||
|
||||
factory := informers.NewSharedInformerFactory(k8sClient, 5*time.Minute)
|
||||
gc := &topologyGC{
|
||||
factory: factory,
|
||||
topoClient: fakeClient,
|
||||
stopChan: stopChan,
|
||||
gcPeriod: time.Second,
|
||||
}
|
||||
|
||||
err := gc.run()
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
nrts, err := fakeClient.TopologyV1alpha1().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
So(nrts.Items, ShouldHaveLength, 2)
|
||||
|
||||
nrt := v1alpha1.NodeResourceTopology{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "not-existing",
|
||||
},
|
||||
}
|
||||
|
||||
go gc.periodicGC(time.Second)
|
||||
|
||||
_, err = fakeClient.TopologyV1alpha1().NodeResourceTopologies().Create(context.TODO(), &nrt, metav1.CreateOptions{})
|
||||
So(err, ShouldBeNil)
|
||||
// simple sleep with retry loop to make sure GC was triggered
|
||||
deleted := false
|
||||
for i := 0; i < 5; i++ {
|
||||
nrts, err := fakeClient.TopologyV1alpha1().NodeResourceTopologies().List(context.TODO(), metav1.ListOptions{})
|
||||
So(err, ShouldBeNil)
|
||||
|
||||
if len(nrts.Items) == 2 {
|
||||
deleted = true
|
||||
break
|
||||
}
|
||||
time.Sleep(2 * time.Second)
|
||||
}
|
||||
So(deleted, ShouldBeTrue)
|
||||
})
|
||||
|
||||
}
|
Loading…
Reference in a new issue