diff --git a/CHANGELOG.md b/CHANGELOG.md index 37fac17cc..2220e4aeb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - (Bugfix) Disable member removal in case of health failure - (Bugfix) Reorder Topology management plan steps - (Feature) UpdateInProgress & UpgradeInProgress Conditions +- (Bugfix) Fix Maintenance switch and HotBackup race ## [1.2.9](https://github.com/arangodb/kube-arangodb/tree/1.2.9) (2022-03-30) - (Feature) Improve Kubernetes clientsets management diff --git a/pkg/deployment/agency/state.go b/pkg/deployment/agency/state.go index aa74f267c..70657738e 100644 --- a/pkg/deployment/agency/state.go +++ b/pkg/deployment/agency/state.go @@ -27,7 +27,6 @@ import ( "github.com/arangodb/go-driver" "github.com/arangodb/go-driver/agency" - "github.com/arangodb/kube-arangodb/pkg/util" "github.com/arangodb/kube-arangodb/pkg/util/errors" ) @@ -111,35 +110,7 @@ type StatePlan struct { } type StateSupervision struct { - Maintenance StateExists `json:"Maintenance,omitempty"` -} - -type StateExists []byte - -func (d StateExists) Hash() string { - if d == nil { - return "" - } - - return util.SHA256(d) -} - -func (d StateExists) Exists() bool { - return d != nil -} - -func (d *StateExists) UnmarshalJSON(bytes []byte) error { - if bytes == nil { - *d = nil - return nil - } - - z := make([]byte, len(bytes)) - - copy(z, bytes) - - *d = z - return nil + Maintenance StateTimestamp `json:"Maintenance,omitempty"` } func (s State) CountShards() int { diff --git a/pkg/deployment/agency/state_exists.go b/pkg/deployment/agency/state_exists.go new file mode 100644 index 000000000..c87a83373 --- /dev/null +++ b/pkg/deployment/agency/state_exists.go @@ -0,0 +1,51 @@ +// +// DISCLAIMER +// +// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// + +package agency + +import "github.com/arangodb/kube-arangodb/pkg/util" + +type StateExists []byte + +func (d StateExists) Hash() string { + if d == nil { + return "" + } + + return util.SHA256(d) +} + +func (d StateExists) Exists() bool { + return d != nil +} + +func (d *StateExists) UnmarshalJSON(bytes []byte) error { + if bytes == nil { + *d = nil + return nil + } + + z := make([]byte, len(bytes)) + + copy(z, bytes) + + *d = z + return nil +} diff --git a/pkg/deployment/agency/state_timestamp.go b/pkg/deployment/agency/state_timestamp.go new file mode 100644 index 000000000..49f61b3d9 --- /dev/null +++ b/pkg/deployment/agency/state_timestamp.go @@ -0,0 +1,101 @@ +// +// DISCLAIMER +// +// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// + +package agency + +import ( + "encoding/json" + "time" + + "github.com/arangodb/kube-arangodb/pkg/util" + "github.com/arangodb/kube-arangodb/pkg/util/errors" +) + +type StateTimestamp struct { + hash string + data string + time time.Time + parsed bool + exists bool +} + +func (s *StateTimestamp) Hash() string { + if s == nil { + return util.SHA256FromString("") + } + + return s.hash +} + +func (s *StateTimestamp) Exists() bool { + if s == nil { + return false + } + + return s.exists +} + +func (s *StateTimestamp) Time() (time.Time, bool) { + if s == nil { + return time.Time{}, false + } + + if !s.parsed || !s.exists { + return time.Time{}, false + } + + return s.time, true +} + +func (s *StateTimestamp) UnmarshalJSON(bytes []byte) error { + if s == nil { + return errors.Newf("Object is nil") + } + + var t string + + if err := json.Unmarshal(bytes, &t); err != nil { + return err + } + + *s = unmarshalJSONStateTimestamp(t) + + return nil +} + +func unmarshalJSONStateTimestamp(s string) StateTimestamp { + var ts = StateTimestamp{ + hash: util.SHA256([]byte(s)), + data: s, + exists: true, + } + + t, ok := util.ParseAgencyTime(s) + if !ok { + ts.parsed = false + return ts + } + + ts.time = t + ts.parsed = true + ts.hash = util.SHA256FromString(s) + + return ts +} diff --git a/pkg/deployment/agency/target.go b/pkg/deployment/agency/target.go index 89b291565..d0dfc768d 100644 --- a/pkg/deployment/agency/target.go +++ b/pkg/deployment/agency/target.go @@ -25,5 +25,5 @@ type StateTarget struct { } type StateTargetHotBackup struct { - Create StateExists `json:"Create,omitempty"` + Create StateTimestamp `json:"Create,omitempty"` } diff --git a/pkg/deployment/agency/target_test.go b/pkg/deployment/agency/target_test.go index 66f0a7cbf..6e2d3daf7 100644 --- a/pkg/deployment/agency/target_test.go +++ b/pkg/deployment/agency/target_test.go @@ -33,6 +33,8 @@ func Test_Target_HotBackup(t *testing.T) { require.NoError(t, json.Unmarshal(agencyDump39HotBackup, &s)) require.True(t, s.Agency.Arango.Target.HotBackup.Create.Exists()) + + t.Log(s.Agency.Arango.Target.HotBackup.Create.time.String()) }) t.Run("Does Not Exists", func(t *testing.T) { var s DumpState diff --git a/pkg/deployment/deployment_inspector.go b/pkg/deployment/deployment_inspector.go index a5e678adb..797732702 100644 --- a/pkg/deployment/deployment_inspector.go +++ b/pkg/deployment/deployment_inspector.go @@ -404,23 +404,33 @@ func (d *Deployment) refreshMaintenanceTTL(ctx context.Context) { return } - condition, ok := d.status.last.Conditions.Get(api.ConditionTypeMaintenanceMode) + agencyState, agencyOK := d.GetAgencyCache() + if !agencyOK { + return + } + + condition, ok := d.status.last.Conditions.Get(api.ConditionTypeMaintenance) + maintenance := agencyState.Supervision.Maintenance if !ok || !condition.IsTrue() { return } // Check GracePeriod - if condition.LastUpdateTime.Add(d.apiObject.Spec.Timeouts.GetMaintenanceGracePeriod()).Before(time.Now()) { - if err := d.SetAgencyMaintenanceMode(ctx, true); err != nil { - return + if t, ok := maintenance.Time(); ok { + if time.Until(t) < d.apiObject.Spec.Timeouts.GetMaintenanceGracePeriod() { + if err := d.SetAgencyMaintenanceMode(ctx, true); err != nil { + return + } + d.deps.Log.Info().Msgf("Refreshed maintenance lock") } - if err := d.WithStatusUpdate(ctx, func(s *api.DeploymentStatus) bool { - return s.Conditions.Touch(api.ConditionTypeMaintenanceMode) - }); err != nil { - return + } else { + if condition.LastUpdateTime.Add(d.apiObject.Spec.Timeouts.GetMaintenanceGracePeriod()).Before(time.Now()) { + if err := d.SetAgencyMaintenanceMode(ctx, true); err != nil { + return + } + d.deps.Log.Info().Msgf("Refreshed maintenance lock") } - d.deps.Log.Info().Msgf("Refreshed maintenance lock") } } diff --git a/pkg/deployment/reconcile/action_maintenance_condition.go b/pkg/deployment/reconcile/action_maintenance_condition.go index 0b706a0f8..5e5f72c27 100644 --- a/pkg/deployment/reconcile/action_maintenance_condition.go +++ b/pkg/deployment/reconcile/action_maintenance_condition.go @@ -21,8 +21,6 @@ package reconcile import ( - "context" - api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" "github.com/rs/zerolog" ) @@ -34,7 +32,7 @@ func init() { func newSetMaintenanceConditionAction(log zerolog.Logger, action api.Action, actionCtx ActionContext) Action { a := &actionSetMaintenanceCondition{} - a.actionImpl = newActionImpl(log, action, actionCtx, &a.newMemberID) + a.actionImpl = newActionImplDefRef(log, action, actionCtx) return a } @@ -43,33 +41,5 @@ type actionSetMaintenanceCondition struct { // actionImpl implement timeout and member id functions actionImpl - actionEmptyCheckProgress - - newMemberID string -} - -func (a *actionSetMaintenanceCondition) Start(ctx context.Context) (bool, error) { - switch a.actionCtx.GetMode() { - case api.DeploymentModeSingle: - return true, nil - } - - agencyState, agencyOK := a.actionCtx.GetAgencyCache() - if !agencyOK { - a.log.Error().Msgf("Unable to determine maintenance condition") - } else { - - if err := a.actionCtx.WithStatusUpdate(ctx, func(s *api.DeploymentStatus) bool { - if agencyState.Supervision.Maintenance.Exists() { - return s.Conditions.Update(api.ConditionTypeMaintenanceMode, true, "Maintenance", "Maintenance enabled") - } else { - return s.Conditions.Remove(api.ConditionTypeMaintenanceMode) - } - }); err != nil { - a.log.Error().Err(err).Msgf("Unable to set maintenance condition") - return true, nil - } - } - - return true, nil + actionEmpty } diff --git a/pkg/deployment/reconcile/helper_wrap.go b/pkg/deployment/reconcile/helper_wrap.go index 0db9289b7..4879349d2 100644 --- a/pkg/deployment/reconcile/helper_wrap.go +++ b/pkg/deployment/reconcile/helper_wrap.go @@ -39,8 +39,7 @@ func withMaintenanceStart(plan ...api.Action) api.Plan { } return api.AsPlan(plan).Before( - actions.NewClusterAction(api.ActionTypeEnableMaintenance, "Enable maintenance before actions"), - actions.NewClusterAction(api.ActionTypeSetMaintenanceCondition, "Enable maintenance before actions")) + actions.NewClusterAction(api.ActionTypeEnableMaintenance, "Enable maintenance before actions")) } func withResignLeadership(group api.ServerGroup, member api.MemberStatus, reason string, plan ...api.Action) api.Plan { diff --git a/pkg/deployment/reconcile/plan_builder_common.go b/pkg/deployment/reconcile/plan_builder_common.go index 76d1d5ad0..795f118aa 100644 --- a/pkg/deployment/reconcile/plan_builder_common.go +++ b/pkg/deployment/reconcile/plan_builder_common.go @@ -23,6 +23,8 @@ package reconcile import ( "context" + "time" + api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" "github.com/arangodb/kube-arangodb/pkg/deployment/actions" "github.com/arangodb/kube-arangodb/pkg/deployment/features" @@ -31,6 +33,27 @@ import ( "github.com/rs/zerolog" ) +var ( + ObsoleteClusterConditions = []api.ConditionType{ + api.ConditionTypeMaintenanceMode, + } +) + +func cleanupConditions(ctx context.Context, + log zerolog.Logger, apiObject k8sutil.APIObject, + spec api.DeploymentSpec, status api.DeploymentStatus, + cachedStatus inspectorInterface.Inspector, planCtx PlanBuilderContext) api.Plan { + var p api.Plan + + for _, c := range ObsoleteClusterConditions { + if _, ok := status.Conditions.Get(c); ok { + p = append(p, removeConditionActionV2("Cleanup Condition", c)) + } + } + + return p +} + func createMaintenanceManagementPlan(ctx context.Context, log zerolog.Logger, apiObject k8sutil.APIObject, spec api.DeploymentSpec, status api.DeploymentStatus, @@ -50,22 +73,37 @@ func createMaintenanceManagementPlan(ctx context.Context, return nil } + if agencyState.Target.HotBackup.Create.Exists() { + log.Info().Msgf("HotBackup in progress") + return nil + } + enabled := agencyState.Supervision.Maintenance.Exists() + c, cok := status.Conditions.Get(api.ConditionTypeMaintenance) + + if (cok && c.IsTrue()) != enabled { + // Condition not yet propagated + log.Info().Msgf("Condition not yet propagated") + return nil + } + + if cok { + if t := c.LastTransitionTime.Time; !t.IsZero() { + if time.Since(t) < 5*time.Second { + // Did not elapse 5 s + return nil + } + } + } if !enabled && spec.Database.GetMaintenance() { log.Info().Msgf("Enabling maintenance mode") - return api.Plan{actions.NewClusterAction(api.ActionTypeEnableMaintenance), actions.NewClusterAction(api.ActionTypeSetMaintenanceCondition)} + return api.Plan{actions.NewClusterAction(api.ActionTypeEnableMaintenance)} } if enabled && !spec.Database.GetMaintenance() { log.Info().Msgf("Disabling maintenance mode") - return api.Plan{actions.NewClusterAction(api.ActionTypeDisableMaintenance), actions.NewClusterAction(api.ActionTypeSetMaintenanceCondition)} - } - - condition, ok := status.Conditions.Get(api.ConditionTypeMaintenanceMode) - - if enabled != (ok && condition.IsTrue()) { - return api.Plan{actions.NewClusterAction(api.ActionTypeSetMaintenanceCondition)} + return api.Plan{actions.NewClusterAction(api.ActionTypeDisableMaintenance)} } return nil diff --git a/pkg/deployment/reconcile/plan_builder_high.go b/pkg/deployment/reconcile/plan_builder_high.go index 4d9da85dd..fd7f92a53 100644 --- a/pkg/deployment/reconcile/plan_builder_high.go +++ b/pkg/deployment/reconcile/plan_builder_high.go @@ -60,7 +60,8 @@ func createHighPlan(ctx context.Context, log zerolog.Logger, apiObject k8sutil.A ApplyIfEmpty(createRebalancerCheckPlan). ApplyWithBackOff(BackOffCheck, time.Minute, emptyPlanBuilder)). Apply(createBackupInProgressConditionPlan). // Discover backups always - Apply(createMaintenanceConditionPlan) // Discover maintenance always + Apply(createMaintenanceConditionPlan). // Discover maintenance always + Apply(cleanupConditions) // Cleanup Conditions return r.Plan(), r.BackOff(), true } diff --git a/pkg/util/times.go b/pkg/util/times.go index 471648513..a48c27b8a 100644 --- a/pkg/util/times.go +++ b/pkg/util/times.go @@ -23,6 +23,8 @@ package util import ( "math" + "time" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -41,3 +43,25 @@ func TimeCompareEqualPointer(a, b *metav1.Time) bool { return TimeCompareEqual(*a, *b) } + +func TimeAgencyLayouts() []string { + return []string{ + time.RFC3339, + } +} + +func ParseAgencyTime(s string) (time.Time, bool) { + t, id := ParseTime(s, TimeAgencyLayouts()...) + + return t, id != -1 +} + +func ParseTime(s string, layouts ...string) (time.Time, int) { + for id, layout := range layouts { + if t, err := time.Parse(layout, s); err == nil { + return t, id + } + } + + return time.Time{}, -1 +}