mirror of
https://github.com/arangodb/kube-arangodb.git
synced 2024-12-14 11:57:37 +00:00
[Bugfix] Ensure PDBs Consistency (#1221)
This commit is contained in:
parent
880ae2384b
commit
a9f82d28b3
3 changed files with 87 additions and 88 deletions
|
@ -6,6 +6,7 @@
|
|||
- (Bugfix) Recover from locked ShuttingDown state
|
||||
- (Feature) Add tolerations runtime rotation
|
||||
- (Feature) Promote Version Check Feature
|
||||
- (Bugfix) Ensure PDBs Consistency
|
||||
|
||||
## [1.2.22](https://github.com/arangodb/kube-arangodb/tree/1.2.22) (2022-12-13)
|
||||
- (Bugfix) Do not manage ports in managed ExternalAccess mode
|
||||
|
|
|
@ -411,6 +411,8 @@ func (i *inspectorState) refreshInThreads(ctx context.Context, threads int, load
|
|||
|
||||
i.throttles = n.throttles
|
||||
|
||||
i.versionInfo = n.versionInfo
|
||||
|
||||
i.last = time.Now()
|
||||
i.initialised = true
|
||||
|
||||
|
|
|
@ -23,7 +23,6 @@ package resources
|
|||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
policyv1 "k8s.io/api/policy/v1"
|
||||
policyv1beta1 "k8s.io/api/policy/v1beta1"
|
||||
|
@ -35,7 +34,6 @@ import (
|
|||
"github.com/arangodb/kube-arangodb/pkg/util/globals"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/kerrors"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/timer"
|
||||
)
|
||||
|
||||
func min(a int, b int) int {
|
||||
|
@ -50,36 +48,45 @@ func (r *Resources) EnsurePDBs(ctx context.Context) error {
|
|||
|
||||
// Only in Cluster and Production Mode
|
||||
spec := r.context.GetSpec()
|
||||
status := r.context.GetStatus()
|
||||
if spec.IsProduction() && spec.GetMode().IsCluster() {
|
||||
|
||||
// We want to lose at most one agent and dbserver.
|
||||
// Coordinators are not that critical. To keep the service available two should be enough
|
||||
minAgents := spec.GetServerGroupSpec(api.ServerGroupAgents).GetCount() - 1
|
||||
currAgents := status.Members.Agents.MembersReady()
|
||||
|
||||
minDBServers := spec.GetServerGroupSpec(api.ServerGroupDBServers).GetCount() - 1
|
||||
currDBServers := status.Members.DBServers.MembersReady()
|
||||
|
||||
minCoordinators := min(spec.GetServerGroupSpec(api.ServerGroupCoordinators).GetCount()-1, 2)
|
||||
currCoordinators := status.Members.Coordinators.MembersReady()
|
||||
|
||||
// Setting those to zero triggers a remove of the PDB
|
||||
minSyncMaster := 0
|
||||
minSyncWorker := 0
|
||||
minSyncMaster, currSyncMaster := 0, 0
|
||||
minSyncWorker, currSyncWorker := 0, 0
|
||||
if r.context.IsSyncEnabled() {
|
||||
minSyncMaster = spec.GetServerGroupSpec(api.ServerGroupSyncMasters).GetCount() - 1
|
||||
currSyncMaster = status.Members.SyncMasters.MembersReady()
|
||||
|
||||
minSyncWorker = spec.GetServerGroupSpec(api.ServerGroupSyncWorkers).GetCount() - 1
|
||||
currSyncWorker = status.Members.SyncWorkers.MembersReady()
|
||||
}
|
||||
|
||||
// Ensure all PDBs as calculated
|
||||
if err := r.ensurePDBForGroup(ctx, api.ServerGroupAgents, minAgents); err != nil {
|
||||
if err := r.ensurePDBForGroup(ctx, api.ServerGroupAgents, minAgents, currAgents); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := r.ensurePDBForGroup(ctx, api.ServerGroupDBServers, minDBServers); err != nil {
|
||||
if err := r.ensurePDBForGroup(ctx, api.ServerGroupDBServers, minDBServers, currDBServers); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := r.ensurePDBForGroup(ctx, api.ServerGroupCoordinators, minCoordinators); err != nil {
|
||||
if err := r.ensurePDBForGroup(ctx, api.ServerGroupCoordinators, minCoordinators, currCoordinators); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := r.ensurePDBForGroup(ctx, api.ServerGroupSyncMasters, minSyncMaster); err != nil {
|
||||
if err := r.ensurePDBForGroup(ctx, api.ServerGroupSyncMasters, minSyncMaster, currSyncMaster); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := r.ensurePDBForGroup(ctx, api.ServerGroupSyncWorkers, minSyncWorker); err != nil {
|
||||
if err := r.ensurePDBForGroup(ctx, api.ServerGroupSyncWorkers, minSyncWorker, currSyncWorker); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -122,7 +129,7 @@ func newPDBV1(minAvail int, deplname string, group api.ServerGroup, owner meta.O
|
|||
}
|
||||
|
||||
// ensurePDBForGroup ensure pdb for a specific server group, if wantMinAvail is zero or less, the PDB is removed and not recreated
|
||||
func (r *Resources) ensurePDBForGroup(ctx context.Context, group api.ServerGroup, wantedMinAvail int) error {
|
||||
func (r *Resources) ensurePDBForGroup(ctx context.Context, group api.ServerGroup, wantedMinAvail, current int) error {
|
||||
if wantedMinAvail < 0 {
|
||||
// Enforce removal
|
||||
wantedMinAvail = 0
|
||||
|
@ -134,101 +141,90 @@ func (r *Resources) ensurePDBForGroup(ctx context.Context, group api.ServerGroup
|
|||
cache := r.context.ACS().CurrentClusterCache()
|
||||
pdbMod := cache.PodDisruptionBudgetsModInterface()
|
||||
|
||||
for {
|
||||
var minAvailable *intstr.IntOrString
|
||||
var deletionTimestamp *meta.Time
|
||||
var minAvailable *intstr.IntOrString
|
||||
var deletionTimestamp *meta.Time
|
||||
|
||||
err := globals.GetGlobalTimeouts().Kubernetes().RunWithTimeout(ctx, func(ctxChild context.Context) error {
|
||||
if inspector, err := cache.PodDisruptionBudget().V1(); err == nil {
|
||||
if pdb, err := inspector.Read().Get(ctxChild, pdbName, meta.GetOptions{}); err != nil {
|
||||
return err
|
||||
} else {
|
||||
minAvailable = pdb.Spec.MinAvailable
|
||||
deletionTimestamp = pdb.GetDeletionTimestamp()
|
||||
}
|
||||
} else if inspector, err := cache.PodDisruptionBudget().V1Beta1(); err == nil {
|
||||
if pdb, err := inspector.Read().Get(ctxChild, pdbName, meta.GetOptions{}); err != nil {
|
||||
return err
|
||||
} else {
|
||||
minAvailable = pdb.Spec.MinAvailable
|
||||
deletionTimestamp = pdb.GetDeletionTimestamp()
|
||||
}
|
||||
err := globals.GetGlobalTimeouts().Kubernetes().RunWithTimeout(ctx, func(ctxChild context.Context) error {
|
||||
if inspector, err := cache.PodDisruptionBudget().V1(); err == nil {
|
||||
if pdb, err := inspector.Read().Get(ctxChild, pdbName, meta.GetOptions{}); err != nil {
|
||||
return err
|
||||
} else {
|
||||
return errors.WithStack(err)
|
||||
minAvailable = pdb.Spec.MinAvailable
|
||||
deletionTimestamp = pdb.GetDeletionTimestamp()
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
if kerrors.IsNotFound(err) {
|
||||
if wantedMinAvail != 0 {
|
||||
// No PDB found - create new.
|
||||
log.Debug("Creating new PDB")
|
||||
err = globals.GetGlobalTimeouts().Kubernetes().RunWithTimeout(ctx, func(ctxChild context.Context) error {
|
||||
var errInternal error
|
||||
|
||||
if cache.PodDisruptionBudget().Version().IsV1() {
|
||||
pdb := newPDBV1(wantedMinAvail, deplName, group, r.context.GetAPIObject().AsOwner())
|
||||
_, errInternal = pdbMod.V1().Create(ctxChild, pdb, meta.CreateOptions{})
|
||||
} else {
|
||||
pdb := newPDBV1Beta1(wantedMinAvail, deplName, group, r.context.GetAPIObject().AsOwner())
|
||||
_, errInternal = pdbMod.V1Beta1().Create(ctxChild, pdb, meta.CreateOptions{})
|
||||
}
|
||||
|
||||
return errInternal
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
log.Err(err).Error("failed to create PDB")
|
||||
return errors.WithStack(err)
|
||||
}
|
||||
} else if inspector, err := cache.PodDisruptionBudget().V1Beta1(); err == nil {
|
||||
if pdb, err := inspector.Read().Get(ctxChild, pdbName, meta.GetOptions{}); err != nil {
|
||||
return err
|
||||
} else {
|
||||
minAvailable = pdb.Spec.MinAvailable
|
||||
deletionTimestamp = pdb.GetDeletionTimestamp()
|
||||
}
|
||||
|
||||
return nil
|
||||
} else if err != nil {
|
||||
// Some other error than not found.
|
||||
} else {
|
||||
return errors.WithStack(err)
|
||||
}
|
||||
|
||||
// PDB v1 or v1beta1 is here.
|
||||
if minAvailable.IntValue() == wantedMinAvail && wantedMinAvail != 0 {
|
||||
return nil
|
||||
}
|
||||
// Update for PDBs is forbidden, thus one has to delete it and then create it again
|
||||
// Otherwise delete it if wantedMinAvail is zero
|
||||
log.Int("wanted-min-avail", wantedMinAvail).
|
||||
Int("current-min-avail", minAvailable.IntValue()).
|
||||
Debug("Recreating PDB")
|
||||
return nil
|
||||
})
|
||||
|
||||
if kerrors.IsNotFound(err) {
|
||||
if wantedMinAvail != 0 && wantedMinAvail < current {
|
||||
// No PDB found - create new.
|
||||
log.Debug("Creating new PDB")
|
||||
err = globals.GetGlobalTimeouts().Kubernetes().RunWithTimeout(ctx, func(ctxChild context.Context) error {
|
||||
var errInternal error
|
||||
|
||||
// Trigger deletion only if not already deleted.
|
||||
if deletionTimestamp == nil {
|
||||
// Update the PDB.
|
||||
err := globals.GetGlobalTimeouts().Kubernetes().RunWithTimeout(ctx, func(ctxChild context.Context) error {
|
||||
if cache.PodDisruptionBudget().Version().IsV1() {
|
||||
return pdbMod.V1().Delete(ctxChild, pdbName, meta.DeleteOptions{})
|
||||
pdb := newPDBV1(wantedMinAvail, deplName, group, r.context.GetAPIObject().AsOwner())
|
||||
_, errInternal = pdbMod.V1().Create(ctxChild, pdb, meta.CreateOptions{})
|
||||
} else {
|
||||
pdb := newPDBV1Beta1(wantedMinAvail, deplName, group, r.context.GetAPIObject().AsOwner())
|
||||
_, errInternal = pdbMod.V1Beta1().Create(ctxChild, pdb, meta.CreateOptions{})
|
||||
}
|
||||
|
||||
return pdbMod.V1Beta1().Delete(ctxChild, pdbName, meta.DeleteOptions{})
|
||||
return errInternal
|
||||
})
|
||||
if err != nil && !kerrors.IsNotFound(err) {
|
||||
log.Err(err).Error("PDB deletion failed")
|
||||
|
||||
if err != nil {
|
||||
log.Err(err).Error("failed to create PDB")
|
||||
return errors.WithStack(err)
|
||||
}
|
||||
} else {
|
||||
log.Debug("PDB already deleted")
|
||||
}
|
||||
// Exit here if deletion was intended
|
||||
if wantedMinAvail == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
log.Debug("Retry loop for PDB")
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-timer.After(time.Second):
|
||||
}
|
||||
return nil
|
||||
} else if err != nil {
|
||||
// Some other error than not found.
|
||||
return errors.WithStack(err)
|
||||
}
|
||||
|
||||
// PDB v1 or v1beta1 is here.
|
||||
if minAvailable.IntValue() == wantedMinAvail && wantedMinAvail != 0 {
|
||||
return nil
|
||||
}
|
||||
// Update for PDBs is forbidden, thus one has to delete it and then create it again
|
||||
// Otherwise delete it if wantedMinAvail is zero
|
||||
log.Int("wanted-min-avail", wantedMinAvail).
|
||||
Int("current-min-avail", minAvailable.IntValue()).
|
||||
Debug("Recreating PDB")
|
||||
|
||||
// Trigger deletion only if not already deleted.
|
||||
if deletionTimestamp == nil {
|
||||
// Update the PDB.
|
||||
err := globals.GetGlobalTimeouts().Kubernetes().RunWithTimeout(ctx, func(ctxChild context.Context) error {
|
||||
if cache.PodDisruptionBudget().Version().IsV1() {
|
||||
return pdbMod.V1().Delete(ctxChild, pdbName, meta.DeleteOptions{})
|
||||
}
|
||||
|
||||
return pdbMod.V1Beta1().Delete(ctxChild, pdbName, meta.DeleteOptions{})
|
||||
})
|
||||
if err != nil && !kerrors.IsNotFound(err) {
|
||||
log.Err(err).Error("PDB deletion failed")
|
||||
return errors.WithStack(err)
|
||||
}
|
||||
} else {
|
||||
log.Debug("PDB already deleted")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func newFromInt(v int) *intstr.IntOrString {
|
||||
|
|
Loading…
Reference in a new issue