From 83b7f919aa3eac39441578f25dd466f4ca355440 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Charles-Edouard=20Br=C3=A9t=C3=A9ch=C3=A9?=
 <charled.breteche@gmail.com>
Date: Fri, 7 Oct 2022 16:21:37 +0200
Subject: [PATCH] refactor: make cert manager a real controller (#4792)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Charles-Edouard Brétéché <charles.edouard@nirmata.com>

Co-authored-by: Prateek Pandey <prateek.pandey@nirmata.com>
---
 cmd/kyverno/main.go                       |  13 +--
 pkg/controllers/certmanager/controller.go | 112 +++++++++++++---------
 2 files changed, 70 insertions(+), 55 deletions(-)

diff --git a/cmd/kyverno/main.go b/cmd/kyverno/main.go
index 5e04e6669b..014a5dac46 100644
--- a/cmd/kyverno/main.go
+++ b/cmd/kyverno/main.go
@@ -639,11 +639,6 @@ func main() {
 			logger := logger.WithName("leader")
 			// when losing the lead we just terminate the pod
 			defer signalCancel()
-			// init tls secret
-			if err := certRenewer.InitTLSPemPair(); err != nil {
-				logger.Error(err, "tls initialization error")
-				os.Exit(1)
-			}
 			// validate config
 			if err := webhookCfg.ValidateWebhookConfigurations(config.KyvernoNamespace(), config.KyvernoConfigMapName()); err != nil {
 				logger.Error(err, "invalid format of the Kyverno init ConfigMap, please correct the format of 'data.webhooks'")
@@ -682,6 +677,10 @@ func main() {
 				// TODO: shall we just exit ?
 				logger.Error(errors.New("failed to wait for cache sync"), "failed to wait for cache sync")
 			}
+			// start leader controllers
+			for _, controller := range leaderControllers {
+				go controller.run(signalCtx, logger.WithName("controllers"))
+			}
 			// bootstrap
 			if autoUpdateWebhooks {
 				go webhookCfg.UpdateWebhookConfigurations(configuration)
@@ -692,10 +691,6 @@ func main() {
 				os.Exit(1)
 			}
 			webhookCfg.UpdateWebhookChan <- true
-			// start leader controllers
-			for _, controller := range leaderControllers {
-				go controller.run(signalCtx, logger.WithName("controllers"))
-			}
 			// wait until we loose the lead (or signal context is canceled)
 			<-ctx.Done()
 		},
diff --git a/pkg/controllers/certmanager/controller.go b/pkg/controllers/certmanager/controller.go
index c65d8f760a..351ca8fd33 100644
--- a/pkg/controllers/certmanager/controller.go
+++ b/pkg/controllers/certmanager/controller.go
@@ -2,91 +2,111 @@ package certmanager
 
 import (
 	"context"
-	"os"
-	"reflect"
 	"time"
 
+	"github.com/go-logr/logr"
 	"github.com/kyverno/kyverno/pkg/common"
 	"github.com/kyverno/kyverno/pkg/config"
 	"github.com/kyverno/kyverno/pkg/controllers"
 	"github.com/kyverno/kyverno/pkg/tls"
+	controllerutils "github.com/kyverno/kyverno/pkg/utils/controller"
 	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/labels"
 	corev1informers "k8s.io/client-go/informers/core/v1"
 	corev1listers "k8s.io/client-go/listers/core/v1"
-	"k8s.io/client-go/tools/cache"
+	"k8s.io/client-go/util/workqueue"
 )
 
 const (
 	// Workers is the number of workers for this controller
 	Workers        = 1
 	ControllerName = "certmanager-controller"
+	maxRetries     = 10
 )
 
 type controller struct {
-	renewer      *tls.CertRenewer
+	renewer *tls.CertRenewer
+
+	// listers
 	secretLister corev1listers.SecretLister
-	secretQueue  chan bool
+
+	// queue
+	queue         workqueue.RateLimitingInterface
+	secretEnqueue controllerutils.EnqueueFunc
 }
 
 func NewController(secretInformer corev1informers.SecretInformer, certRenewer *tls.CertRenewer) controllers.Controller {
-	manager := &controller{
-		renewer:      certRenewer,
-		secretLister: secretInformer.Lister(),
-		secretQueue:  make(chan bool, 1),
+	queue := workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), ControllerName)
+	c := controller{
+		renewer:       certRenewer,
+		secretLister:  secretInformer.Lister(),
+		queue:         queue,
+		secretEnqueue: controllerutils.AddDefaultEventHandlers(logger.V(3), secretInformer.Informer(), queue),
 	}
-	secretInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
-		AddFunc:    manager.addSecretFunc,
-		UpdateFunc: manager.updateSecretFunc,
-	})
-	return manager
+	return &c
 }
 
-func (m *controller) Run(ctx context.Context, workers int) {
-	logger.Info("start managing certificate")
+func (c *controller) Run(ctx context.Context, workers int) {
+	go c.ticker(ctx)
+	// we need to enqueue our secrets in case they don't exist yet in the cluster
+	// this way we ensure the reconcile happens (hence renewal/creation)
+	if err := c.secretEnqueue(&corev1.Secret{
+		ObjectMeta: metav1.ObjectMeta{
+			Namespace: config.KyvernoNamespace(),
+			Name:      tls.GenerateTLSPairSecretName(),
+		},
+	}); err != nil {
+		logger.Error(err, "failed to enqueue secret", "name", tls.GenerateTLSPairSecretName())
+	}
+	if err := c.secretEnqueue(&corev1.Secret{
+		ObjectMeta: metav1.ObjectMeta{
+			Namespace: config.KyvernoNamespace(),
+			Name:      tls.GenerateRootCASecretName(),
+		},
+	}); err != nil {
+		logger.Error(err, "failed to enqueue CA secret", "name", tls.GenerateRootCASecretName())
+	}
+	controllerutils.Run(ctx, ControllerName, logger.V(3), c.queue, workers, maxRetries, c.reconcile)
+}
+
+func (c *controller) reconcile(ctx context.Context, logger logr.Logger, key, namespace, name string) error {
+	if namespace != config.KyvernoNamespace() {
+		return nil
+	}
+	if name != tls.GenerateTLSPairSecretName() && name != tls.GenerateRootCASecretName() {
+		return nil
+	}
+	return c.renewCertificates()
+}
+
+func (c *controller) ticker(ctx context.Context) {
 	certsRenewalTicker := time.NewTicker(tls.CertRenewalInterval)
 	defer certsRenewalTicker.Stop()
 	for {
 		select {
 		case <-certsRenewalTicker.C:
-			if err := m.renewCertificates(); err != nil {
-				logger.Error(err, "unable to renew certificates, force restarting")
-				os.Exit(1)
-			}
-		case <-m.secretQueue:
-			if err := m.renewCertificates(); err != nil {
-				logger.Error(err, "unable to renew certificates, force restarting")
-				os.Exit(1)
+			list, err := c.secretLister.List(labels.Everything())
+			if err == nil {
+				for _, secret := range list {
+					if err := c.secretEnqueue(secret); err != nil {
+						logger.Error(err, "failed to enqueue secret", "name", secret.Name)
+					}
+				}
+			} else {
+				logger.Error(err, "falied to list secrets")
 			}
 		case <-ctx.Done():
-			logger.V(2).Info("stopping cert renewer")
 			return
 		}
 	}
 }
 
-func (m *controller) addSecretFunc(obj interface{}) {
-	secret := obj.(*corev1.Secret)
-	if secret.GetNamespace() == config.KyvernoNamespace() && secret.GetName() == tls.GenerateTLSPairSecretName() {
-		m.secretQueue <- true
-	}
-}
-
-func (m *controller) updateSecretFunc(oldObj interface{}, newObj interface{}) {
-	old := oldObj.(*corev1.Secret)
-	new := newObj.(*corev1.Secret)
-	if new.GetNamespace() == config.KyvernoNamespace() && new.GetName() == tls.GenerateTLSPairSecretName() {
-		if !reflect.DeepEqual(old.DeepCopy().Data, new.DeepCopy().Data) {
-			m.secretQueue <- true
-			logger.V(4).Info("secret updated, reconciling webhook configurations")
-		}
-	}
-}
-
-func (m *controller) renewCertificates() error {
-	if err := common.RetryFunc(time.Second, 5*time.Second, m.renewer.RenewCA, "failed to renew CA", logger)(); err != nil {
+func (c *controller) renewCertificates() error {
+	if err := common.RetryFunc(time.Second, 5*time.Second, c.renewer.RenewCA, "failed to renew CA", logger)(); err != nil {
 		return err
 	}
-	if err := common.RetryFunc(time.Second, 5*time.Second, m.renewer.RenewTLS, "failed to renew TLS", logger)(); err != nil {
+	if err := common.RetryFunc(time.Second, 5*time.Second, c.renewer.RenewTLS, "failed to renew TLS", logger)(); err != nil {
 		return err
 	}
 	return nil