1
0
Fork 0
mirror of https://github.com/external-secrets/external-secrets.git synced 2024-12-14 11:57:59 +00:00

feat: fix cert-controller readiness probe (#2857)

readiness probes are being executed independently from the
leader election status. The current implementation depends on
leader election (client cache etc.) to run properly.
This commit fixes that by short-circuiting the readiness probes
when the mgr is not the leader.

This bug surfaces when `leader-election=true` and cert-controller `replicas>=2`.

Signed-off-by: Moritz Johner <beller.moritz@googlemail.com>
This commit is contained in:
Moritz Johner 2023-11-07 09:51:27 +01:00 committed by GitHub
parent e0c1d93f9b
commit f5cd6816aa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 84 additions and 38 deletions

View file

@ -87,7 +87,8 @@ var certcontrollerCmd = &cobra.Command{
setupLog.Error(err, "unable to start manager")
os.Exit(1)
}
crdctrl := crds.New(mgr.GetClient(), mgr.GetScheme(),
crdctrl := crds.New(mgr.GetClient(), mgr.GetScheme(), mgr.Elected(),
ctrl.Log.WithName("controllers").WithName("webhook-certs-updater"),
crdRequeueInterval, serviceName, serviceNamespace, secretName, secretNamespace, crdNames)
if err := crdctrl.SetupWithManager(mgr, controller.Options{
@ -97,7 +98,7 @@ var certcontrollerCmd = &cobra.Command{
os.Exit(1)
}
whc := webhookconfig.New(mgr.GetClient(), mgr.GetScheme(),
whc := webhookconfig.New(mgr.GetClient(), mgr.GetScheme(), mgr.Elected(),
ctrl.Log.WithName("controllers").WithName("webhook-certs-updater"),
serviceName, serviceNamespace,
secretName, secretNamespace, crdRequeueInterval)

View file

@ -72,11 +72,14 @@ type Reconciler struct {
RequeueInterval time.Duration
// the controller is ready when all crds are injected
rdyMu *sync.Mutex
// and the controller is elected as leader
leaderChan <-chan struct{}
leaderElected bool
readyStatusMapMu *sync.Mutex
readyStatusMap map[string]bool
}
func New(k8sClient client.Client, scheme *runtime.Scheme, logger logr.Logger,
func New(k8sClient client.Client, scheme *runtime.Scheme, leaderChan <-chan struct{}, logger logr.Logger,
interval time.Duration, svcName, svcNamespace, secretName, secretNamespace string, resources []string) *Reconciler {
return &Reconciler{
Client: k8sClient,
@ -90,7 +93,8 @@ func New(k8sClient client.Client, scheme *runtime.Scheme, logger logr.Logger,
CrdResources: resources,
CAName: "external-secrets",
CAOrganization: "external-secrets",
rdyMu: &sync.Mutex{},
leaderChan: leaderChan,
readyStatusMapMu: &sync.Mutex{},
readyStatusMap: map[string]bool{},
}
}
@ -117,14 +121,14 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
err := r.updateCRD(ctx, req)
if err != nil {
log.Error(err, "failed to inject conversion webhook")
r.rdyMu.Lock()
r.readyStatusMapMu.Lock()
r.readyStatusMap[req.NamespacedName.Name] = false
r.rdyMu.Unlock()
r.readyStatusMapMu.Unlock()
return ctrl.Result{}, err
}
r.rdyMu.Lock()
r.readyStatusMapMu.Lock()
r.readyStatusMap[req.NamespacedName.Name] = true
r.rdyMu.Unlock()
r.readyStatusMapMu.Unlock()
}
return ctrl.Result{RequeueAfter: r.RequeueInterval}, nil
}
@ -132,14 +136,35 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
// ReadyCheck reviews if all webhook configs have been injected into the CRDs
// and if the referenced webhook service is ready.
func (r *Reconciler) ReadyCheck(_ *http.Request) error {
// skip readiness check if we're not leader
// as we depend on caches and being able to reconcile Webhooks
if !r.leaderElected {
select {
case <-r.leaderChan:
r.leaderElected = true
default:
return nil
}
}
if err := r.checkCRDs(); err != nil {
return err
}
return r.checkEndpoints()
}
func (r Reconciler) checkCRDs() error {
for _, res := range r.CrdResources {
r.rdyMu.Lock()
r.readyStatusMapMu.Lock()
rdy := r.readyStatusMap[res]
r.rdyMu.Unlock()
r.readyStatusMapMu.Unlock()
if !rdy {
return fmt.Errorf(errResNotReady, res)
}
}
return nil
}
func (r Reconciler) checkEndpoints() error {
var eps corev1.Endpoints
err := r.Get(context.TODO(), types.NamespacedName{
Name: r.SvcName,

View file

@ -77,7 +77,9 @@ var _ = BeforeSuite(func() {
Expect(err).ToNot(HaveOccurred())
Expect(k8sClient).ToNot(BeNil())
rec := New(k8sClient, k8sManager.GetScheme(), log, time.Second*1,
leaderChan := make(chan struct{})
close(leaderChan)
rec := New(k8sClient, k8sManager.GetScheme(), leaderChan, log, time.Second*1,
"foo", "default", "foo", "default", []string{
"secretstores.test.io",
})

View file

@ -84,8 +84,9 @@ var _ = BeforeSuite(func() {
k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme})
Expect(err).ToNot(HaveOccurred())
Expect(k8sClient).ToNot(BeNil())
reconciler = New(k8sClient, k8sManager.GetScheme(), ctrl.Log, ctrlSvcName, ctrlSvcNamespace, ctrlSecretName, ctrlSecretNamespace, time.Second)
leaderChan := make(chan struct{})
close(leaderChan)
reconciler = New(k8sClient, k8sManager.GetScheme(), leaderChan, ctrl.Log, ctrlSvcName, ctrlSvcNamespace, ctrlSecretName, ctrlSecretNamespace, time.Second)
reconciler.SetupWithManager(k8sManager, controller.Options{})
Expect(err).ToNot(HaveOccurred())

View file

@ -46,11 +46,16 @@ type Reconciler struct {
SecretName string
SecretNamespace string
rdyMu *sync.Mutex
ready bool
// store state for the readiness probe.
// we're ready when we're not the leader or
// if we've reconciled the webhook config when we're the leader.
leaderChan <-chan struct{}
leaderElected bool
webhookReadyMu *sync.Mutex
webhookReady bool
}
func New(k8sClient client.Client, scheme *runtime.Scheme,
func New(k8sClient client.Client, scheme *runtime.Scheme, leaderChan <-chan struct{},
log logr.Logger, svcName, svcNamespace, secretName, secretNamespace string,
requeueInterval time.Duration) *Reconciler {
return &Reconciler{
@ -62,8 +67,10 @@ func New(k8sClient client.Client, scheme *runtime.Scheme,
SvcNamespace: svcNamespace,
SecretName: secretName,
SecretNamespace: secretNamespace,
rdyMu: &sync.Mutex{},
ready: false,
leaderChan: leaderChan,
leaderElected: false,
webhookReadyMu: &sync.Mutex{},
webhookReady: false,
}
}
@ -109,9 +116,9 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
// right now we only have one single
// webhook config we care about
r.rdyMu.Lock()
defer r.rdyMu.Unlock()
r.ready = true
r.webhookReadyMu.Lock()
defer r.webhookReadyMu.Unlock()
r.webhookReady = true
return ctrl.Result{
RequeueAfter: r.RequeueDuration,
}, nil
@ -126,9 +133,19 @@ func (r *Reconciler) SetupWithManager(mgr ctrl.Manager, opts controller.Options)
}
func (r *Reconciler) ReadyCheck(_ *http.Request) error {
r.rdyMu.Lock()
defer r.rdyMu.Unlock()
if !r.ready {
// skip readiness check if we're not leader
// as we depend on caches and being able to reconcile Webhooks
if !r.leaderElected {
select {
case <-r.leaderChan:
r.leaderElected = true
default:
return nil
}
}
r.webhookReadyMu.Lock()
defer r.webhookReadyMu.Unlock()
if !r.webhookReady {
return fmt.Errorf(errWebhookNotReady)
}
var eps v1.Endpoints