diff --git a/CHANGELOG.md b/CHANGELOG.md index f1233d4bd..f5d13e522 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ - (Bugfix) Fix 3.6 -> 3.7 Upgrade procedure - (Bugfix) Add missing finalizer - (Bugfix) Add graceful to kill command +- (Bugfix) Add reachable condition to deployment. Mark as UpToDate only of cluster is reachable. +- (Bugfix) Add toleration's for network failures in action start procedure ## [1.2.7](https://github.com/arangodb/kube-arangodb/tree/1.2.7) (2022-01-17) - Add Plan BackOff functionality diff --git a/pkg/apis/deployment/v1/conditions.go b/pkg/apis/deployment/v1/conditions.go index 1d72bfd7e..d3d540621 100644 --- a/pkg/apis/deployment/v1/conditions.go +++ b/pkg/apis/deployment/v1/conditions.go @@ -91,6 +91,9 @@ const ( // ConditionTypeTopologyAware indicates that the member is deployed with TopologyAwareness. ConditionTypeTopologyAware ConditionType = "TopologyAware" + // ConditionTypePVCResizePending indicates that the member has to be restarted due to PVC Resized pending action + ConditionTypePVCResizePending ConditionType = "PVCResizePending" + // ConditionTypeLicenseSet indicates that license V2 is set on cluster. ConditionTypeLicenseSet ConditionType = "LicenseSet" ) diff --git a/pkg/apis/deployment/v2alpha1/conditions.go b/pkg/apis/deployment/v2alpha1/conditions.go index ea1aefbea..0de54f061 100644 --- a/pkg/apis/deployment/v2alpha1/conditions.go +++ b/pkg/apis/deployment/v2alpha1/conditions.go @@ -91,6 +91,9 @@ const ( // ConditionTypeTopologyAware indicates that the member is deployed with TopologyAwareness. ConditionTypeTopologyAware ConditionType = "TopologyAware" + // ConditionTypePVCResizePending indicates that the member has to be restarted due to PVC Resized pending action + ConditionTypePVCResizePending ConditionType = "PVCResizePending" + // ConditionTypeLicenseSet indicates that license V2 is set on cluster. ConditionTypeLicenseSet ConditionType = "LicenseSet" ) diff --git a/pkg/deployment/cleanup.go b/pkg/deployment/cleanup.go index 3e484df15..325e14ad2 100644 --- a/pkg/deployment/cleanup.go +++ b/pkg/deployment/cleanup.go @@ -30,25 +30,31 @@ import ( "github.com/arangodb/kube-arangodb/pkg/deployment/resources/inspector" "github.com/arangodb/kube-arangodb/pkg/util" + "github.com/arangodb/kube-arangodb/pkg/util/constants" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector" ) // removePodFinalizers removes all finalizers from all pods owned by us. -func (d *Deployment) removePodFinalizers(ctx context.Context, cachedStatus inspectorInterface.Inspector) error { +func (d *Deployment) removePodFinalizers(ctx context.Context, cachedStatus inspectorInterface.Inspector) (bool, error) { log := d.deps.Log + found := false + if err := cachedStatus.IteratePods(func(pod *core.Pod) error { - if err := k8sutil.RemovePodFinalizers(ctx, cachedStatus, log, d.PodsModInterface(), pod, pod.GetFinalizers(), true); err != nil { + log.Info().Str("pod", pod.GetName()).Msgf("Removing Pod Finalizer") + if count, err := k8sutil.RemovePodFinalizers(ctx, cachedStatus, log, d.PodsModInterface(), pod, constants.ManagedFinalizers(), true); err != nil { log.Warn().Err(err).Msg("Failed to remove pod finalizers") return err + } else if count > 0 { + found = true } ctxChild, cancel := globals.GetGlobalTimeouts().Kubernetes().WithTimeout(ctx) defer cancel() if err := d.PodsModInterface().Delete(ctxChild, pod.GetName(), meta.DeleteOptions{ - GracePeriodSeconds: util.NewInt64(1), + GracePeriodSeconds: util.NewInt64(0), }); err != nil { if !k8sutil.IsNotFound(err) { log.Warn().Err(err).Msg("Failed to remove pod") @@ -57,25 +63,30 @@ func (d *Deployment) removePodFinalizers(ctx context.Context, cachedStatus inspe } return nil }, inspector.FilterPodsByLabels(k8sutil.LabelsForDeployment(d.GetName(), ""))); err != nil { - return err + return false, err } - return nil + return found, nil } // removePVCFinalizers removes all finalizers from all PVCs owned by us. -func (d *Deployment) removePVCFinalizers(ctx context.Context, cachedStatus inspectorInterface.Inspector) error { +func (d *Deployment) removePVCFinalizers(ctx context.Context, cachedStatus inspectorInterface.Inspector) (bool, error) { log := d.deps.Log + found := false + if err := cachedStatus.IteratePersistentVolumeClaims(func(pvc *core.PersistentVolumeClaim) error { - if err := k8sutil.RemovePVCFinalizers(ctx, cachedStatus, log, d.PersistentVolumeClaimsModInterface(), pvc, pvc.GetFinalizers(), true); err != nil { + log.Info().Str("pvc", pvc.GetName()).Msgf("Removing PVC Finalizer") + if count, err := k8sutil.RemovePVCFinalizers(ctx, cachedStatus, log, d.PersistentVolumeClaimsModInterface(), pvc, constants.ManagedFinalizers(), true); err != nil { log.Warn().Err(err).Msg("Failed to remove PVC finalizers") return err + } else if count > 0 { + found = true } return nil }, inspector.FilterPersistentVolumeClaimsByLabels(k8sutil.LabelsForDeployment(d.GetName(), ""))); err != nil { - return err + return false, err } - return nil + return found, nil } diff --git a/pkg/deployment/context_impl.go b/pkg/deployment/context_impl.go index d0f17017f..8730160ac 100644 --- a/pkg/deployment/context_impl.go +++ b/pkg/deployment/context_impl.go @@ -460,7 +460,7 @@ func (d *Deployment) RemovePodFinalizers(ctx context.Context, podName string) er return errors.WithStack(err) } - err = k8sutil.RemovePodFinalizers(ctx, d.GetCachedStatus(), log, d.PodsModInterface(), p, p.GetFinalizers(), true) + _, err = k8sutil.RemovePodFinalizers(ctx, d.GetCachedStatus(), log, d.PodsModInterface(), p, p.GetFinalizers(), true) if err != nil { return errors.WithStack(err) } diff --git a/pkg/deployment/deployment.go b/pkg/deployment/deployment.go index 159ee3442..e38293d5a 100644 --- a/pkg/deployment/deployment.go +++ b/pkg/deployment/deployment.go @@ -339,10 +339,10 @@ func (d *Deployment) run() { } // Remove finalizers from created resources log.Info().Msg("Deployment removed, removing finalizers to prevent orphaned resources") - if err := d.removePodFinalizers(context.TODO(), cachedStatus); err != nil { + if _, err := d.removePodFinalizers(context.TODO(), cachedStatus); err != nil { log.Warn().Err(err).Msg("Failed to remove Pod finalizers") } - if err := d.removePVCFinalizers(context.TODO(), cachedStatus); err != nil { + if _, err := d.removePVCFinalizers(context.TODO(), cachedStatus); err != nil { log.Warn().Err(err).Msg("Failed to remove PVC finalizers") } // We're being stopped. diff --git a/pkg/deployment/deployment_finalizers.go b/pkg/deployment/deployment_finalizers.go index cfbf03695..28b1c8b02 100644 --- a/pkg/deployment/deployment_finalizers.go +++ b/pkg/deployment/deployment_finalizers.go @@ -68,8 +68,10 @@ func (d *Deployment) runDeploymentFinalizers(ctx context.Context, cachedStatus i switch f { case constants.FinalizerDeplRemoveChildFinalizers: log.Debug().Msg("Inspecting 'remove child finalizers' finalizer") - if err := d.inspectRemoveChildFinalizers(ctx, log, updated, cachedStatus); err == nil { + if retry, err := d.inspectRemoveChildFinalizers(ctx, log, updated, cachedStatus); err == nil && !retry { removalList = append(removalList, f) + } else if retry { + log.Debug().Str("finalizer", f).Msg("Retry on finalizer removal") } else { log.Debug().Err(err).Str("finalizer", f).Msg("Cannot remove finalizer yet") } @@ -87,15 +89,21 @@ func (d *Deployment) runDeploymentFinalizers(ctx context.Context, cachedStatus i // inspectRemoveChildFinalizers checks the finalizer condition for remove-child-finalizers. // It returns nil if the finalizer can be removed. -func (d *Deployment) inspectRemoveChildFinalizers(ctx context.Context, _ zerolog.Logger, _ *api.ArangoDeployment, cachedStatus inspectorInterface.Inspector) error { - if err := d.removePodFinalizers(ctx, cachedStatus); err != nil { - return errors.WithStack(err) +func (d *Deployment) inspectRemoveChildFinalizers(ctx context.Context, _ zerolog.Logger, _ *api.ArangoDeployment, cachedStatus inspectorInterface.Inspector) (bool, error) { + retry := false + + if found, err := d.removePodFinalizers(ctx, cachedStatus); err != nil { + return false, errors.WithStack(err) + } else if found { + retry = true } - if err := d.removePVCFinalizers(ctx, cachedStatus); err != nil { - return errors.WithStack(err) + if found, err := d.removePVCFinalizers(ctx, cachedStatus); err != nil { + return false, errors.WithStack(err) + } else if found { + retry = true } - return nil + return retry, nil } // removeDeploymentFinalizers removes the given finalizers from the given PVC. @@ -125,7 +133,7 @@ func removeDeploymentFinalizers(ctx context.Context, log zerolog.Logger, cli ver return nil } ignoreNotFound := false - if err := k8sutil.RemoveFinalizers(log, finalizers, getFunc, updateFunc, ignoreNotFound); err != nil { + if _, err := k8sutil.RemoveFinalizers(log, finalizers, getFunc, updateFunc, ignoreNotFound); err != nil { return errors.WithStack(err) } return nil diff --git a/pkg/deployment/deployment_inspector.go b/pkg/deployment/deployment_inspector.go index ccec54b6d..437b66c32 100644 --- a/pkg/deployment/deployment_inspector.go +++ b/pkg/deployment/deployment_inspector.go @@ -274,6 +274,22 @@ func (d *Deployment) inspectDeploymentWithError(ctx context.Context, lastInterva return minInspectionInterval, nil } + // Reachable state ensurer + reachableConditionState := status.Conditions.Check(api.ConditionTypeReachable).Exists().IsTrue().Evaluate() + if d.State().IsReachable() { + if !reachableConditionState { + if err = d.updateConditionWithHash(ctx, api.ConditionTypeReachable, true, "ArangoDB is reachable", "", ""); err != nil { + return minInspectionInterval, errors.Wrapf(err, "Unable to update Reachable condition") + } + } + } else { + if reachableConditionState { + if err = d.updateConditionWithHash(ctx, api.ConditionTypeReachable, false, "ArangoDB is not reachable", "", ""); err != nil { + return minInspectionInterval, errors.Wrapf(err, "Unable to update Reachable condition") + } + } + } + if d.apiObject.Status.IsPlanEmpty() && status.AppliedVersion != checksum { if err := d.WithStatusUpdate(ctx, func(s *api.DeploymentStatus) bool { s.AppliedVersion = checksum @@ -284,7 +300,7 @@ func (d *Deployment) inspectDeploymentWithError(ctx context.Context, lastInterva return minInspectionInterval, nil } else if status.AppliedVersion == checksum { - isUpToDate, reason := d.isUpToDateStatus() + isUpToDate, reason := d.isUpToDateStatus(status) if !isUpToDate && status.Conditions.IsTrue(api.ConditionTypeUpToDate) { if err = d.updateConditionWithHash(ctx, api.ConditionTypeUpToDate, false, reason, "There are pending operations in plan or members are in restart process", checksum); err != nil { @@ -332,18 +348,31 @@ func (d *Deployment) inspectDeploymentWithError(ctx context.Context, lastInterva return } -func (d *Deployment) isUpToDateStatus() (upToDate bool, reason string) { - if !d.apiObject.Status.IsPlanEmpty() { +func (d *Deployment) isUpToDateStatus(status api.DeploymentStatus) (upToDate bool, reason string) { + if !status.IsPlanEmpty() { return false, "Plan is not empty" } upToDate = true - d.apiObject.Status.Members.ForeachServerGroup(func(group api.ServerGroup, list api.MemberStatusList) error { + if !status.Conditions.Check(api.ConditionTypeReachable).Exists().IsTrue().Evaluate() { + upToDate = false + } + + status.Members.ForeachServerGroup(func(group api.ServerGroup, list api.MemberStatusList) error { + if !upToDate { + return nil + } for _, member := range list { if member.Conditions.IsTrue(api.ConditionTypeRestart) || member.Conditions.IsTrue(api.ConditionTypePendingRestart) { upToDate = false reason = "Pending restarts on members" + return nil + } + if member.Conditions.IsTrue(api.ConditionTypePVCResizePending) { + upToDate = false + reason = "PVC is resizing" + return nil } } return nil diff --git a/pkg/deployment/member/phase_updates.go b/pkg/deployment/member/phase_updates.go index ba164281c..0e511b969 100644 --- a/pkg/deployment/member/phase_updates.go +++ b/pkg/deployment/member/phase_updates.go @@ -87,6 +87,7 @@ func removeMemberConditionsMapFunc(m *api.MemberStatus) { m.Conditions.Remove(api.ConditionTypeCleanedOut) m.Conditions.Remove(api.ConditionTypeTopologyAware) m.Conditions.Remove(api.MemberReplacementRequired) + m.Conditions.Remove(api.ConditionTypePVCResizePending) m.RemoveTerminationsBefore(time.Now().Add(-1 * recentTerminationsKeepPeriod)) diff --git a/pkg/deployment/member/state.go b/pkg/deployment/member/state.go index 2f43ca941..b8dee3a16 100644 --- a/pkg/deployment/member/state.go +++ b/pkg/deployment/member/state.go @@ -35,10 +35,12 @@ type StateInspector interface { RefreshState(ctx context.Context, members api.DeploymentStatusMemberElements) MemberState(id string) (State, bool) + State() State + Log(logger zerolog.Logger) } -func NewStateInspector(client reconciler.DeploymentMemberClient) StateInspector { +func NewStateInspector(client reconciler.DeploymentClient) StateInspector { return &stateInspector{ client: client, } @@ -49,7 +51,13 @@ type stateInspector struct { members map[string]State - client reconciler.DeploymentMemberClient + state State + + client reconciler.DeploymentClient +} + +func (s *stateInspector) State() State { + return s.state } func (s *stateInspector) Log(logger zerolog.Logger) { @@ -89,6 +97,23 @@ func (s *stateInspector) RefreshState(ctx context.Context, members api.Deploymen } }) + gctx, cancel := globals.GetGlobalTimeouts().ArangoDCheck().WithTimeout(ctx) + defer cancel() + + var cs State + + c, err := s.client.GetDatabaseClient(ctx) + if err != nil { + cs.Reachable = err + } else { + v, err := c.Version(gctx) + if err != nil { + cs.Reachable = err + } else { + cs.Version = v + } + } + current := map[string]State{} for id := range members { @@ -96,6 +121,7 @@ func (s *stateInspector) RefreshState(ctx context.Context, members api.Deploymen } s.members = current + s.state = cs } func (s *stateInspector) MemberState(id string) (State, bool) { diff --git a/pkg/deployment/reconcile/action.go b/pkg/deployment/reconcile/action.go index 552fb7784..7576bf3ff 100644 --- a/pkg/deployment/reconcile/action.go +++ b/pkg/deployment/reconcile/action.go @@ -173,9 +173,9 @@ func NewActionSuccess() ActionCore { return actionSuccess{} } -// Start always returns true. +// Start always returns false to start with progress. func (actionSuccess) Start(_ context.Context) (bool, error) { - return true, nil + return false, nil } // CheckProgress always returns true. diff --git a/pkg/deployment/reconcile/plan_builder_high.go b/pkg/deployment/reconcile/plan_builder_high.go index eccf1bec2..af3884096 100644 --- a/pkg/deployment/reconcile/plan_builder_high.go +++ b/pkg/deployment/reconcile/plan_builder_high.go @@ -53,6 +53,7 @@ func createHighPlan(ctx context.Context, log zerolog.Logger, apiObject k8sutil.A ApplyIfEmpty(updateMemberUpdateConditionsPlan). ApplyIfEmpty(updateMemberRotationConditionsPlan). ApplyIfEmpty(createMemberRecreationConditionsPlan). + ApplyIfEmpty(createRotateServerStoragePVCPendingResizeConditionPlan). ApplyIfEmpty(createTopologyMemberUpdatePlan). ApplyIfEmptyWithBackOff(LicenseCheck, 30*time.Second, updateClusterLicense). ApplyIfEmpty(createTopologyMemberConditionPlan). diff --git a/pkg/deployment/reconcile/plan_builder_storage.go b/pkg/deployment/reconcile/plan_builder_storage.go index 190521f10..240db550d 100644 --- a/pkg/deployment/reconcile/plan_builder_storage.go +++ b/pkg/deployment/reconcile/plan_builder_storage.go @@ -86,6 +86,36 @@ func createRotateServerStorageResizePlan(ctx context.Context, return plan } +func createRotateServerStoragePVCPendingResizeConditionPlan(ctx context.Context, + log zerolog.Logger, apiObject k8sutil.APIObject, + spec api.DeploymentSpec, status api.DeploymentStatus, + cachedStatus inspectorInterface.Inspector, context PlanBuilderContext) api.Plan { + var plan api.Plan + for _, i := range status.Members.AsList() { + if i.Member.PersistentVolumeClaimName == "" { + continue + } + + pvc, exists := cachedStatus.PersistentVolumeClaim(i.Member.PersistentVolumeClaimName) + if !exists { + continue + } + + pvcResizePending := k8sutil.IsPersistentVolumeClaimFileSystemResizePending(pvc) + pvcResizePendingCond := i.Member.Conditions.IsTrue(api.ConditionTypePVCResizePending) + + if pvcResizePending != pvcResizePendingCond { + if pvcResizePending { + plan = append(plan, updateMemberConditionActionV2("PVC Resize pending", api.ConditionTypePVCResizePending, i.Group, i.Member.ID, true, "PVC Resize pending", "", "")) + } else { + plan = append(plan, removeMemberConditionActionV2("PVC Resize is done", api.ConditionTypePVCResizePending, i.Group, i.Member.ID)) + } + } + } + + return plan +} + func pvcResizePlan(log zerolog.Logger, group api.ServerGroup, groupSpec api.ServerGroupSpec, member api.MemberStatus) api.Plan { mode := groupSpec.VolumeResizeMode.Get() switch mode { diff --git a/pkg/deployment/reconcile/plan_builder_test.go b/pkg/deployment/reconcile/plan_builder_test.go index 570436c33..12336427e 100644 --- a/pkg/deployment/reconcile/plan_builder_test.go +++ b/pkg/deployment/reconcile/plan_builder_test.go @@ -39,9 +39,9 @@ import ( "k8s.io/client-go/kubernetes" "github.com/arangodb/arangosync-client/client" + "github.com/arangodb/go-driver" "github.com/arangodb/go-driver/agency" - "github.com/arangodb/go-driver" backupApi "github.com/arangodb/kube-arangodb/pkg/apis/backup/v1" api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" "github.com/arangodb/kube-arangodb/pkg/deployment/actions" @@ -1007,7 +1007,7 @@ func TestCreatePlan(t *testing.T) { ad.Status.Members.Agents[0].PersistentVolumeClaimName = "pvc_test" }, ExpectedHighPlan: []api.Action{ - actions.NewAction(api.ActionTypeSetMemberCondition, api.ServerGroupAgents, deploymentTemplate.Status.Members.Agents[0], "PVC Resize pending"), + actions.NewAction(api.ActionTypeSetMemberConditionV2, api.ServerGroupAgents, deploymentTemplate.Status.Members.Agents[0], "PVC Resize pending"), }, ExpectedLog: "PVC Resize pending", }, diff --git a/pkg/deployment/resources/pod_finalizers.go b/pkg/deployment/resources/pod_finalizers.go index b30df98d0..94e772a3b 100644 --- a/pkg/deployment/resources/pod_finalizers.go +++ b/pkg/deployment/resources/pod_finalizers.go @@ -117,7 +117,7 @@ func (r *Resources) runPodFinalizers(ctx context.Context, p *v1.Pod, memberStatu } // Remove finalizers (if needed) if len(removalList) > 0 { - if err := k8sutil.RemovePodFinalizers(ctx, r.context.GetCachedStatus(), log, r.context.PodsModInterface(), p, removalList, false); err != nil { + if _, err := k8sutil.RemovePodFinalizers(ctx, r.context.GetCachedStatus(), log, r.context.PodsModInterface(), p, removalList, false); err != nil { log.Debug().Err(err).Msg("Failed to update pod (to remove finalizers)") return 0, errors.WithStack(err) } diff --git a/pkg/deployment/resources/pod_inspector.go b/pkg/deployment/resources/pod_inspector.go index 323908ed3..6c7bb945b 100644 --- a/pkg/deployment/resources/pod_inspector.go +++ b/pkg/deployment/resources/pod_inspector.go @@ -86,7 +86,7 @@ func (r *Resources) InspectPods(ctx context.Context, cachedStatus inspectorInter // Strange, pod belongs to us, but we have no member for it. // Remove all finalizers, so it can be removed. log.Warn().Msg("Pod belongs to this deployment, but we don't know the member. Removing all finalizers") - err := k8sutil.RemovePodFinalizers(ctx, r.context.GetCachedStatus(), log, r.context.PodsModInterface(), pod, pod.GetFinalizers(), false) + _, err := k8sutil.RemovePodFinalizers(ctx, r.context.GetCachedStatus(), log, r.context.PodsModInterface(), pod, pod.GetFinalizers(), false) if err != nil { log.Debug().Err(err).Msg("Failed to update pod (to remove all finalizers)") return errors.WithStack(err) @@ -218,6 +218,7 @@ func (r *Resources) InspectPods(ctx context.Context, cachedStatus inspectorInter } // End of Topology labels + // Reachable state if state, ok := r.context.MemberState(memberStatus.ID); ok { if state.IsReachable() { if memberStatus.Conditions.Update(api.ConditionTypeReachable, true, "ArangoDB is reachable", "") { diff --git a/pkg/deployment/resources/pvc_finalizers.go b/pkg/deployment/resources/pvc_finalizers.go index e86904723..7c0245241 100644 --- a/pkg/deployment/resources/pvc_finalizers.go +++ b/pkg/deployment/resources/pvc_finalizers.go @@ -60,7 +60,7 @@ func (r *Resources) runPVCFinalizers(ctx context.Context, p *v1.PersistentVolume } // Remove finalizers (if needed) if len(removalList) > 0 { - err := k8sutil.RemovePVCFinalizers(ctx, r.context.GetCachedStatus(), log, r.context.PersistentVolumeClaimsModInterface(), p, removalList, false) + _, err := k8sutil.RemovePVCFinalizers(ctx, r.context.GetCachedStatus(), log, r.context.PersistentVolumeClaimsModInterface(), p, removalList, false) if err != nil { log.Debug().Err(err).Msg("Failed to update PVC (to remove finalizers)") return 0, errors.WithStack(err) diff --git a/pkg/deployment/resources/pvc_inspector.go b/pkg/deployment/resources/pvc_inspector.go index 0c631f037..a35a718fa 100644 --- a/pkg/deployment/resources/pvc_inspector.go +++ b/pkg/deployment/resources/pvc_inspector.go @@ -67,7 +67,7 @@ func (r *Resources) InspectPVCs(ctx context.Context, cachedStatus inspectorInter // Strange, pvc belongs to us, but we have no member for it. // Remove all finalizers, so it can be removed. log.Warn().Msg("PVC belongs to this deployment, but we don't know the member. Removing all finalizers") - err := k8sutil.RemovePVCFinalizers(ctx, r.context.GetCachedStatus(), log, r.context.PersistentVolumeClaimsModInterface(), pvc, pvc.GetFinalizers(), false) + _, err := k8sutil.RemovePVCFinalizers(ctx, r.context.GetCachedStatus(), log, r.context.PersistentVolumeClaimsModInterface(), pvc, pvc.GetFinalizers(), false) if err != nil { log.Debug().Err(err).Msg("Failed to update PVC (to remove all finalizers)") return errors.WithStack(err) diff --git a/pkg/deployment/rotation/check.go b/pkg/deployment/rotation/check.go index e765090c1..3314bf1c7 100644 --- a/pkg/deployment/rotation/check.go +++ b/pkg/deployment/rotation/check.go @@ -21,11 +21,12 @@ package rotation import ( + "time" + "github.com/arangodb/kube-arangodb/pkg/apis/deployment" api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" "github.com/arangodb/kube-arangodb/pkg/handlers/utils" "github.com/arangodb/kube-arangodb/pkg/util/constants" - "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector" "github.com/rs/zerolog" core "k8s.io/api/core/v1" @@ -117,13 +118,10 @@ func IsRotationRequired(log zerolog.Logger, cachedStatus inspectorInterface.Insp return } - pvc, exists := cachedStatus.PersistentVolumeClaim(member.PersistentVolumeClaimName) - if exists { - if k8sutil.IsPersistentVolumeClaimFileSystemResizePending(pvc) { - reason = "PVC Resize pending" - mode = EnforcedRotation - return - } + if member.Conditions.Check(api.ConditionTypePVCResizePending).Exists().LastTransition(3 * time.Minute).Evaluate() { + reason = "PVC Resize pending for more than 3 min" + mode = EnforcedRotation + return } if mode, plan, err := compare(log, spec, member, group, specTemplate, statusTemplate); err != nil { diff --git a/pkg/replication/finalizers.go b/pkg/replication/finalizers.go index ff1920e55..5c8733973 100644 --- a/pkg/replication/finalizers.go +++ b/pkg/replication/finalizers.go @@ -183,7 +183,7 @@ func removeDeploymentReplicationFinalizers(log zerolog.Logger, crcli versioned.I *p = *result return nil } - if err := k8sutil.RemoveFinalizers(log, finalizers, getFunc, updateFunc, ignoreNotFound); err != nil { + if _, err := k8sutil.RemoveFinalizers(log, finalizers, getFunc, updateFunc, ignoreNotFound); err != nil { return errors.WithStack(err) } return nil diff --git a/pkg/util/constants/constants.go b/pkg/util/constants/constants.go index 4d9bf0655..65104dafc 100644 --- a/pkg/util/constants/constants.go +++ b/pkg/util/constants/constants.go @@ -62,3 +62,15 @@ const ( LabelRole = "role" LabelRoleLeader = "leader" ) + +func ManagedFinalizers() []string { + return []string{ + FinalizerDeplRemoveChildFinalizers, + FinalizerDeplReplStopSync, + FinalizerPodAgencyServing, + FinalizerPodDrainDBServer, + FinalizerPodGracefulShutdown, + FinalizerPVCMemberExists, + FinalizerDelayPodTermination, + } +} diff --git a/pkg/util/k8sutil/finalizers.go b/pkg/util/k8sutil/finalizers.go index f417fc8e5..dc28e7e9f 100644 --- a/pkg/util/k8sutil/finalizers.go +++ b/pkg/util/k8sutil/finalizers.go @@ -40,7 +40,7 @@ const ( // RemovePodFinalizers removes the given finalizers from the given pod. func RemovePodFinalizers(ctx context.Context, cachedStatus pod.Inspector, log zerolog.Logger, c pod.ModInterface, p *core.Pod, - finalizers []string, ignoreNotFound bool) error { + finalizers []string, ignoreNotFound bool) (int, error) { getFunc := func() (metav1.Object, error) { ctxChild, cancel := globals.GetGlobalTimeouts().Kubernetes().WithTimeout(ctx) defer cancel() @@ -63,15 +63,16 @@ func RemovePodFinalizers(ctx context.Context, cachedStatus pod.Inspector, log ze *p = *result return nil } - if err := RemoveFinalizers(log, finalizers, getFunc, updateFunc, ignoreNotFound); err != nil { - return errors.WithStack(err) + if count, err := RemoveFinalizers(log, finalizers, getFunc, updateFunc, ignoreNotFound); err != nil { + return 0, errors.WithStack(err) + } else { + return count, nil } - return nil } // RemovePVCFinalizers removes the given finalizers from the given PVC. func RemovePVCFinalizers(ctx context.Context, cachedStatus persistentvolumeclaim.Inspector, log zerolog.Logger, c persistentvolumeclaim.ModInterface, - p *core.PersistentVolumeClaim, finalizers []string, ignoreNotFound bool) error { + p *core.PersistentVolumeClaim, finalizers []string, ignoreNotFound bool) (int, error) { getFunc := func() (metav1.Object, error) { ctxChild, cancel := globals.GetGlobalTimeouts().Kubernetes().WithTimeout(ctx) defer cancel() @@ -94,17 +95,18 @@ func RemovePVCFinalizers(ctx context.Context, cachedStatus persistentvolumeclaim *p = *result return nil } - if err := RemoveFinalizers(log, finalizers, getFunc, updateFunc, ignoreNotFound); err != nil { - return errors.WithStack(err) + if count, err := RemoveFinalizers(log, finalizers, getFunc, updateFunc, ignoreNotFound); err != nil { + return 0, errors.WithStack(err) + } else { + return count, nil } - return nil } // RemoveFinalizers is a helper used to remove finalizers from an object. // The functions tries to get the object using the provided get function, // then remove the given finalizers and update the update using the given update function. // In case of an update conflict, the functions tries again. -func RemoveFinalizers(log zerolog.Logger, finalizers []string, getFunc func() (metav1.Object, error), updateFunc func(metav1.Object) error, ignoreNotFound bool) error { +func RemoveFinalizers(log zerolog.Logger, finalizers []string, getFunc func() (metav1.Object, error), updateFunc func(metav1.Object) error, ignoreNotFound bool) (int, error) { attempts := 0 for { attempts++ @@ -112,15 +114,15 @@ func RemoveFinalizers(log zerolog.Logger, finalizers []string, getFunc func() (m if err != nil { if IsNotFound(err) && ignoreNotFound { // Object no longer found and we're allowed to ignore that. - return nil + return 0, nil } log.Warn().Err(err).Msg("Failed to get resource") - return errors.WithStack(err) + return 0, errors.WithStack(err) } original := obj.GetFinalizers() if len(original) == 0 { // We're done - return nil + return 0, nil } newList := make([]string, 0, len(original)) shouldRemove := func(f string) bool { @@ -136,26 +138,27 @@ func RemoveFinalizers(log zerolog.Logger, finalizers []string, getFunc func() (m newList = append(newList, f) } } - if len(newList) < len(original) { + if z := len(original) - len(newList); z > 0 { obj.SetFinalizers(newList) if err := updateFunc(obj); IsConflict(err) { if attempts > maxRemoveFinalizersAttempts { log.Warn().Err(err).Msg("Failed to update resource with fewer finalizers after many attempts") - return errors.WithStack(err) + return 0, errors.WithStack(err) } else { // Try again continue } } else if IsNotFound(err) && ignoreNotFound { // Object no longer found and we're allowed to ignore that. - return nil + return 0, nil } else if err != nil { log.Warn().Err(err).Msg("Failed to update resource with fewer finalizers") - return errors.WithStack(err) + return 0, errors.WithStack(err) } + return z, nil } else { log.Debug().Msg("No finalizers needed removal. Resource unchanged") + return 0, nil } - return nil } }