From 053d9176fcb3063b764a77921e2119de8d260846 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Thu, 20 Oct 2022 09:44:31 +0200 Subject: [PATCH] topology-updater: continue looping on scan error Scanning podresources can temporarily fail; the previous code was mistakenly not rearming the loop condition when this occurred, effectively stopping the monitoring. Rather, we should always pool and bail out on unrecoverable error or when asked to stop. Signed-off-by: Francesco Romani --- .../topology-updater/nfd-topology-updater.go | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/pkg/nfd-client/topology-updater/nfd-topology-updater.go b/pkg/nfd-client/topology-updater/nfd-topology-updater.go index 2e43e5117..baa8f378e 100644 --- a/pkg/nfd-client/topology-updater/nfd-topology-updater.go +++ b/pkg/nfd-client/topology-updater/nfd-topology-updater.go @@ -126,15 +126,15 @@ func (w *nfdTopologyUpdater) Run() error { return err } - crTrigger := time.After(0) + crTrigger := time.NewTicker(w.resourcemonitorArgs.SleepInterval) for { select { - case <-crTrigger: - klog.Infof("Scanning\n") + case <-crTrigger.C: + klog.Infof("Scanning") podResources, err := resScan.Scan() utils.KlogDump(1, "podResources are", " ", podResources) if err != nil { - klog.Warningf("Scan failed: %v\n", err) + klog.Warningf("Scan failed: %v", err) continue } zones = resAggr.Aggregate(podResources) @@ -147,10 +147,6 @@ func (w *nfdTopologyUpdater) Run() error { return nil } - if w.resourcemonitorArgs.SleepInterval > 0 { - crTrigger = time.After(w.resourcemonitorArgs.SleepInterval) - } - case <-w.certWatch.Events: klog.Infof("TLS certificate update, renewing connection to nfd-master") w.Disconnect()