From 6d5e988ebee5b7442a7b37ea3e9b97944b13f45a Mon Sep 17 00:00:00 2001
From: shuting <shutting06@gmail.com>
Date: Tue, 15 Jun 2021 18:39:22 -0700
Subject: [PATCH] BugFix - update the annotation lastRequestTimestamp from
 active instances (#2019)

* fix webhook monitor - inactive instance did not get latest request timestamp

Signed-off-by: Shuting Zhao <shutting06@gmail.com>

* add checks for registered webhook configs

Signed-off-by: Shuting Zhao <shutting06@gmail.com>

* update veridy_deployment.sh

Signed-off-by: Shuting Zhao <shutting06@gmail.com>

* add debug steps in e2d workflow

Signed-off-by: Shuting Zhao <shutting06@gmail.com>

* fix CI errors

Signed-off-by: Shuting Zhao <shutting06@gmail.com>
---
 .github/workflows/e2e.yaml   |  8 +++++
 pkg/webhookconfig/monitor.go | 60 ++++++++++++++++--------------------
 scripts/verify-deployment.sh | 10 +++++-
 3 files changed, 44 insertions(+), 34 deletions(-)

diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml
index ac4e5fdb04..0887a45862 100644
--- a/.github/workflows/e2e.yaml
+++ b/.github/workflows/e2e.yaml
@@ -100,3 +100,11 @@ jobs:
         run: |
           kubectl create namespace kyverno
           ct install --target-branch=main --namespace=kyverno
+
+      - name: Debug failure
+        if: failure()
+        run: |
+          kubectl get mutatingwebhookconfigurations,validatingwebhookconfigurations
+          kubectl -n kyverno get pod
+          kubectl -n kyverno describe pod | grep -i events -A10
+          kubectl -n kyverno logs deploy/kyverno
\ No newline at end of file
diff --git a/pkg/webhookconfig/monitor.go b/pkg/webhookconfig/monitor.go
index 92bce6789f..84139a016d 100644
--- a/pkg/webhookconfig/monitor.go
+++ b/pkg/webhookconfig/monitor.go
@@ -76,7 +76,7 @@ func (t *Monitor) SetTime(tm time.Time) {
 func (t *Monitor) Run(register *Register, certRenewer *tls.CertRenewer, eventGen event.Interface, stopCh <-chan struct{}) {
 	logger := t.log
 
-	logger.V(4).Info("starting webhook monitor", "interval", idleCheckInterval)
+	logger.V(4).Info("starting webhook monitor", "interval", idleCheckInterval.String())
 	status := newStatusControl(register, eventGen, t.log.WithName("WebhookStatusControl"))
 
 	ticker := time.NewTicker(tickerInterval)
@@ -92,7 +92,18 @@ func (t *Monitor) Run(register *Register, certRenewer *tls.CertRenewer, eventGen
 			}
 
 			timeDiff := time.Since(t.Time())
-			if timeDiff > idleDeadline {
+			lastRequestTimeFromAnn := lastRequestTimeFromAnnotation(register, t.log.WithName("lastRequestTimeFromAnnotation"))
+			if lastRequestTimeFromAnn == nil {
+				if err := status.UpdateLastRequestTimestmap(t.Time()); err != nil {
+					logger.Error(err, "failed to annotate deployment for lastRequestTime")
+				} else {
+					logger.Info("initialized lastRequestTimestamp", "time", t.Time())
+				}
+				continue
+			}
+
+			switch {
+			case timeDiff > idleDeadline:
 				err := fmt.Errorf("admission control configuration error")
 				logger.Error(err, "webhook check failed", "deadline", idleDeadline.String())
 				if err := status.failure(); err != nil {
@@ -101,52 +112,36 @@ func (t *Monitor) Run(register *Register, certRenewer *tls.CertRenewer, eventGen
 
 				if err := register.Register(); err != nil {
 					logger.Error(err, "Failed to register webhooks")
-				} else {
-					// if the status was false before then we update it to true
-					// send request to update the Kyverno deployment
-					if err := status.success(); err != nil {
-						logger.Error(err, "failed to annotate deployment webhook status to success")
-					}
 				}
 
-				continue
-			}
-
-			if timeDiff > idleCheckInterval {
+			case timeDiff > 2*idleCheckInterval:
 				if skipWebhookCheck(register, logger.WithName("skipWebhookCheck")) {
 					logger.Info("skip validating webhook status, Kyverno is in rolling update")
 					continue
 				}
 
-				lastRequestTimeFromAnn := lastRequestTimeFromAnnotation(register, t.log.WithName("lastRequestTimeFromAnnotation"))
-				if lastRequestTimeFromAnn == nil {
-					now := time.Now()
-					lastRequestTimeFromAnn = &now
-					if err := status.UpdateLastRequestTimestmap(t.Time()); err != nil {
-						logger.Error(err, "failed to annotate deployment for lastRequestTime")
-					} else {
-						logger.Info("initialized lastRequestTimestamp", "time", lastRequestTimeFromAnn)
-					}
-					continue
-				}
-
 				if t.Time().Before(*lastRequestTimeFromAnn) {
 					t.SetTime(*lastRequestTimeFromAnn)
 					logger.V(3).Info("updated in-memory timestamp", "time", lastRequestTimeFromAnn)
-					continue
 				}
+			}
 
-				idleT := time.Since(*lastRequestTimeFromAnn)
-				if idleT > idleCheckInterval*2 {
-					logger.V(3).Info("webhook idle time exceeded", "lastRequestTimeFromAnn", (*lastRequestTimeFromAnn).String(), "deadline", (idleCheckInterval * 2).String())
+			idleT := time.Since(*lastRequestTimeFromAnn)
+			if idleT > idleCheckInterval {
+				if t.Time().After(*lastRequestTimeFromAnn) {
+					logger.V(3).Info("updating annotation lastRequestTimestamp with the latest in-memory timestamp", "time", t.Time())
 					if err := status.UpdateLastRequestTimestmap(t.Time()); err != nil {
 						logger.Error(err, "failed to update lastRequestTimestamp annotation")
-					} else {
-						logger.V(3).Info("updated annotation lastRequestTimestamp", "time", t.Time())
 					}
 				}
 			}
 
+			// if the status was false before then we update it to true
+			// send request to update the Kyverno deployment
+			if err := status.success(); err != nil {
+				logger.Error(err, "failed to annotate deployment webhook status to success")
+			}
+
 		case <-stopCh:
 			// handler termination signal
 			logger.V(2).Info("stopping webhook monitor")
@@ -179,7 +174,7 @@ func lastRequestTimeFromAnnotation(register *Register, logger logr.Logger) *time
 		return nil
 	}
 
-	annotation, ok, err := unstructured.NestedStringMap(deploy.UnstructuredContent(), "metadata", "annotations")
+	timeStamp, ok, err := unstructured.NestedString(deploy.UnstructuredContent(), "metadata", "annotations", annLastRequestTime)
 	if err != nil {
 		logger.Info("unable to get annotation", "reason", err.Error())
 		return nil
@@ -190,10 +185,9 @@ func lastRequestTimeFromAnnotation(register *Register, logger logr.Logger) *time
 		return nil
 	}
 
-	timeStamp := annotation[annLastRequestTime]
 	annTime, err := time.Parse(time.RFC3339, timeStamp)
 	if err != nil {
-		logger.Error(err, "failed to parse timestamp annotation")
+		logger.Error(err, "failed to parse timestamp annotation", "timeStamp", timeStamp)
 		return nil
 	}
 
diff --git a/scripts/verify-deployment.sh b/scripts/verify-deployment.sh
index cf7720071a..f6dbfcbe65 100644
--- a/scripts/verify-deployment.sh
+++ b/scripts/verify-deployment.sh
@@ -121,4 +121,12 @@ while [[ ${updated_replicas} -lt ${specified_replicas} || ${current_replicas} -g
   available_replicas=$(get_available_replicas)
 done
 
-echo "Deployment ${deployment} successful. All ${available_replicas} replicas are ready."
\ No newline at end of file
+echo "Deployment ${deployment} successful. All ${available_replicas} replicas are ready."
+
+mutatingwebhookconfigurations=$(kubectl get mutatingwebhookconfigurations | wc -l)
+validatingwebhookconfigurations=$(kubectl get validatingwebhookconfigurations | wc -l)
+while [[ ${mutatingwebhookconfigurations} -lt 4 || ${validatingwebhookconfigurations} -lt 3 ]]; do
+  sleep 5
+done
+
+echo "All webhooks are registered."
\ No newline at end of file