From 6d5e988ebee5b7442a7b37ea3e9b97944b13f45a Mon Sep 17 00:00:00 2001 From: shuting <shutting06@gmail.com> Date: Tue, 15 Jun 2021 18:39:22 -0700 Subject: [PATCH] BugFix - update the annotation lastRequestTimestamp from active instances (#2019) * fix webhook monitor - inactive instance did not get latest request timestamp Signed-off-by: Shuting Zhao <shutting06@gmail.com> * add checks for registered webhook configs Signed-off-by: Shuting Zhao <shutting06@gmail.com> * update veridy_deployment.sh Signed-off-by: Shuting Zhao <shutting06@gmail.com> * add debug steps in e2d workflow Signed-off-by: Shuting Zhao <shutting06@gmail.com> * fix CI errors Signed-off-by: Shuting Zhao <shutting06@gmail.com> --- .github/workflows/e2e.yaml | 8 +++++ pkg/webhookconfig/monitor.go | 60 ++++++++++++++++-------------------- scripts/verify-deployment.sh | 10 +++++- 3 files changed, 44 insertions(+), 34 deletions(-) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index ac4e5fdb04..0887a45862 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -100,3 +100,11 @@ jobs: run: | kubectl create namespace kyverno ct install --target-branch=main --namespace=kyverno + + - name: Debug failure + if: failure() + run: | + kubectl get mutatingwebhookconfigurations,validatingwebhookconfigurations + kubectl -n kyverno get pod + kubectl -n kyverno describe pod | grep -i events -A10 + kubectl -n kyverno logs deploy/kyverno \ No newline at end of file diff --git a/pkg/webhookconfig/monitor.go b/pkg/webhookconfig/monitor.go index 92bce6789f..84139a016d 100644 --- a/pkg/webhookconfig/monitor.go +++ b/pkg/webhookconfig/monitor.go @@ -76,7 +76,7 @@ func (t *Monitor) SetTime(tm time.Time) { func (t *Monitor) Run(register *Register, certRenewer *tls.CertRenewer, eventGen event.Interface, stopCh <-chan struct{}) { logger := t.log - logger.V(4).Info("starting webhook monitor", "interval", idleCheckInterval) + logger.V(4).Info("starting webhook monitor", "interval", idleCheckInterval.String()) status := newStatusControl(register, eventGen, t.log.WithName("WebhookStatusControl")) ticker := time.NewTicker(tickerInterval) @@ -92,7 +92,18 @@ func (t *Monitor) Run(register *Register, certRenewer *tls.CertRenewer, eventGen } timeDiff := time.Since(t.Time()) - if timeDiff > idleDeadline { + lastRequestTimeFromAnn := lastRequestTimeFromAnnotation(register, t.log.WithName("lastRequestTimeFromAnnotation")) + if lastRequestTimeFromAnn == nil { + if err := status.UpdateLastRequestTimestmap(t.Time()); err != nil { + logger.Error(err, "failed to annotate deployment for lastRequestTime") + } else { + logger.Info("initialized lastRequestTimestamp", "time", t.Time()) + } + continue + } + + switch { + case timeDiff > idleDeadline: err := fmt.Errorf("admission control configuration error") logger.Error(err, "webhook check failed", "deadline", idleDeadline.String()) if err := status.failure(); err != nil { @@ -101,52 +112,36 @@ func (t *Monitor) Run(register *Register, certRenewer *tls.CertRenewer, eventGen if err := register.Register(); err != nil { logger.Error(err, "Failed to register webhooks") - } else { - // if the status was false before then we update it to true - // send request to update the Kyverno deployment - if err := status.success(); err != nil { - logger.Error(err, "failed to annotate deployment webhook status to success") - } } - continue - } - - if timeDiff > idleCheckInterval { + case timeDiff > 2*idleCheckInterval: if skipWebhookCheck(register, logger.WithName("skipWebhookCheck")) { logger.Info("skip validating webhook status, Kyverno is in rolling update") continue } - lastRequestTimeFromAnn := lastRequestTimeFromAnnotation(register, t.log.WithName("lastRequestTimeFromAnnotation")) - if lastRequestTimeFromAnn == nil { - now := time.Now() - lastRequestTimeFromAnn = &now - if err := status.UpdateLastRequestTimestmap(t.Time()); err != nil { - logger.Error(err, "failed to annotate deployment for lastRequestTime") - } else { - logger.Info("initialized lastRequestTimestamp", "time", lastRequestTimeFromAnn) - } - continue - } - if t.Time().Before(*lastRequestTimeFromAnn) { t.SetTime(*lastRequestTimeFromAnn) logger.V(3).Info("updated in-memory timestamp", "time", lastRequestTimeFromAnn) - continue } + } - idleT := time.Since(*lastRequestTimeFromAnn) - if idleT > idleCheckInterval*2 { - logger.V(3).Info("webhook idle time exceeded", "lastRequestTimeFromAnn", (*lastRequestTimeFromAnn).String(), "deadline", (idleCheckInterval * 2).String()) + idleT := time.Since(*lastRequestTimeFromAnn) + if idleT > idleCheckInterval { + if t.Time().After(*lastRequestTimeFromAnn) { + logger.V(3).Info("updating annotation lastRequestTimestamp with the latest in-memory timestamp", "time", t.Time()) if err := status.UpdateLastRequestTimestmap(t.Time()); err != nil { logger.Error(err, "failed to update lastRequestTimestamp annotation") - } else { - logger.V(3).Info("updated annotation lastRequestTimestamp", "time", t.Time()) } } } + // if the status was false before then we update it to true + // send request to update the Kyverno deployment + if err := status.success(); err != nil { + logger.Error(err, "failed to annotate deployment webhook status to success") + } + case <-stopCh: // handler termination signal logger.V(2).Info("stopping webhook monitor") @@ -179,7 +174,7 @@ func lastRequestTimeFromAnnotation(register *Register, logger logr.Logger) *time return nil } - annotation, ok, err := unstructured.NestedStringMap(deploy.UnstructuredContent(), "metadata", "annotations") + timeStamp, ok, err := unstructured.NestedString(deploy.UnstructuredContent(), "metadata", "annotations", annLastRequestTime) if err != nil { logger.Info("unable to get annotation", "reason", err.Error()) return nil @@ -190,10 +185,9 @@ func lastRequestTimeFromAnnotation(register *Register, logger logr.Logger) *time return nil } - timeStamp := annotation[annLastRequestTime] annTime, err := time.Parse(time.RFC3339, timeStamp) if err != nil { - logger.Error(err, "failed to parse timestamp annotation") + logger.Error(err, "failed to parse timestamp annotation", "timeStamp", timeStamp) return nil } diff --git a/scripts/verify-deployment.sh b/scripts/verify-deployment.sh index cf7720071a..f6dbfcbe65 100644 --- a/scripts/verify-deployment.sh +++ b/scripts/verify-deployment.sh @@ -121,4 +121,12 @@ while [[ ${updated_replicas} -lt ${specified_replicas} || ${current_replicas} -g available_replicas=$(get_available_replicas) done -echo "Deployment ${deployment} successful. All ${available_replicas} replicas are ready." \ No newline at end of file +echo "Deployment ${deployment} successful. All ${available_replicas} replicas are ready." + +mutatingwebhookconfigurations=$(kubectl get mutatingwebhookconfigurations | wc -l) +validatingwebhookconfigurations=$(kubectl get validatingwebhookconfigurations | wc -l) +while [[ ${mutatingwebhookconfigurations} -lt 4 || ${validatingwebhookconfigurations} -lt 3 ]]; do + sleep 5 +done + +echo "All webhooks are registered." \ No newline at end of file