1
0
Fork 0
mirror of https://github.com/kubernetes-sigs/node-feature-discovery.git synced 2025-03-16 21:38:23 +00:00

topology-updater: continue looping on scan error

Scanning podresources can temporarily fail; the previous code was
mistakenly not rearming the loop condition when this occurred,
effectively stopping the monitoring.

Rather, we should always pool and bail out on unrecoverable
error or when asked to stop.

Signed-off-by: Francesco Romani <fromani@redhat.com>
This commit is contained in:
Francesco Romani 2022-10-20 09:44:31 +02:00 committed by k8s-infra-cherrypick-robot
parent 5741fc92e3
commit 053d9176fc

View file

@ -126,15 +126,15 @@ func (w *nfdTopologyUpdater) Run() error {
return err return err
} }
crTrigger := time.After(0) crTrigger := time.NewTicker(w.resourcemonitorArgs.SleepInterval)
for { for {
select { select {
case <-crTrigger: case <-crTrigger.C:
klog.Infof("Scanning\n") klog.Infof("Scanning")
podResources, err := resScan.Scan() podResources, err := resScan.Scan()
utils.KlogDump(1, "podResources are", " ", podResources) utils.KlogDump(1, "podResources are", " ", podResources)
if err != nil { if err != nil {
klog.Warningf("Scan failed: %v\n", err) klog.Warningf("Scan failed: %v", err)
continue continue
} }
zones = resAggr.Aggregate(podResources) zones = resAggr.Aggregate(podResources)
@ -147,10 +147,6 @@ func (w *nfdTopologyUpdater) Run() error {
return nil return nil
} }
if w.resourcemonitorArgs.SleepInterval > 0 {
crTrigger = time.After(w.resourcemonitorArgs.SleepInterval)
}
case <-w.certWatch.Events: case <-w.certWatch.Events:
klog.Infof("TLS certificate update, renewing connection to nfd-master") klog.Infof("TLS certificate update, renewing connection to nfd-master")
w.Disconnect() w.Disconnect()