topology-updater: continue looping on scan error

Scanning podresources can temporarily fail; the previous code was mistakenly not rearming the loop condition when this occurred, effectively stopping the monitoring. Rather, we should always pool and bail out on unrecoverable error or when asked to stop. Signed-off-by: Francesco Romani <fromani@redhat.com>
2025-04-09 02:24:00 +00:00 · 2022-10-20 09:44:31 +02:00 · 2022-10-20 09:44:31 +02:00 · 053d9176fc
commit 053d9176fc
parent 5741fc92e3
1 changed files with 4 additions and 8 deletions
--- a/pkg/nfd-client/topology-updater/nfd-topology-updater.go
+++ b/pkg/nfd-client/topology-updater/nfd-topology-updater.go
@ -126,15 +126,15 @@ func (w *nfdTopologyUpdater) Run() error {
 		return err
 	}

-	crTrigger := time.After(0)
+	crTrigger := time.NewTicker(w.resourcemonitorArgs.SleepInterval)
 	for {
 		select {
-		case <-crTrigger:
-			klog.Infof("Scanning\n")
+		case <-crTrigger.C:
+			klog.Infof("Scanning")
 			podResources, err := resScan.Scan()
 			utils.KlogDump(1, "podResources are", "  ", podResources)
 			if err != nil {
-				klog.Warningf("Scan failed: %v\n", err)
+				klog.Warningf("Scan failed: %v", err)
 				continue
 			}
 			zones = resAggr.Aggregate(podResources)
@ -147,10 +147,6 @@ func (w *nfdTopologyUpdater) Run() error {
 				return nil
 			}

-			if w.resourcemonitorArgs.SleepInterval > 0 {
-				crTrigger = time.After(w.resourcemonitorArgs.SleepInterval)
-			}
-
 		case <-w.certWatch.Events:
 			klog.Infof("TLS certificate update, renewing connection to nfd-master")
 			w.Disconnect()