diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b441aaef..ddd043c59 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Change Log ## [master](https://github.com/arangodb/kube-arangodb/tree/master) (N/A) +- Added extended Rotation check for Cluster mode - Removed old rotation logic (rotation of ArangoDeployment may be enforced after Operator upgrade) - Added UpToDate condition in ArangoDeployment Status diff --git a/pkg/apis/deployment/v1/deployment_mode.go b/pkg/apis/deployment/v1/deployment_mode.go index f8f52ba55..b9249c2f7 100644 --- a/pkg/apis/deployment/v1/deployment_mode.go +++ b/pkg/apis/deployment/v1/deployment_mode.go @@ -49,6 +49,15 @@ func (m DeploymentMode) Validate() error { } } +// Get mode or default value +func (m *DeploymentMode) Get() DeploymentMode { + if m == nil { + return DeploymentModeCluster + } + + return *m +} + // HasSingleServers returns true when the given mode is "Single" or "ActiveFailover". func (m DeploymentMode) HasSingleServers() bool { return m == DeploymentModeSingle || m == DeploymentModeActiveFailover diff --git a/pkg/apis/deployment/v1/plan.go b/pkg/apis/deployment/v1/plan.go index 5eb3dbd2b..28ebfd364 100644 --- a/pkg/apis/deployment/v1/plan.go +++ b/pkg/apis/deployment/v1/plan.go @@ -53,6 +53,8 @@ const ( ActionTypeUpgradeMember ActionType = "UpgradeMember" // ActionTypeWaitForMemberUp causes the plan to wait until the member is considered "up". ActionTypeWaitForMemberUp ActionType = "WaitForMemberUp" + // ActionTypeWaitForMemberInSync causes the plan to wait until members are considered "up" and cluster is healthy. + ActionTypeWaitForMemberInSync ActionType = "WaitForMemberInSync" // ActionTypeRenewTLSCertificate causes the TLS certificate of a member to be renewed. ActionTypeRenewTLSCertificate ActionType = "RenewTLSCertificate" // ActionTypeRenewTLSCACertificate causes the TLS CA certificate of the entire deployment to be renewed. diff --git a/pkg/apis/deployment/v1/server_group_spec.go b/pkg/apis/deployment/v1/server_group_spec.go index 5c793d3b7..4f1265688 100644 --- a/pkg/apis/deployment/v1/server_group_spec.go +++ b/pkg/apis/deployment/v1/server_group_spec.go @@ -84,6 +84,8 @@ type ServerGroupSpec struct { Volumes ServerGroupSpecVolumes `json:"volumes,omitempty"` // VolumeMounts define list of volume mounts mounted into server container VolumeMounts ServerGroupSpecVolumeMounts `json:"volumeMounts,omitempty"` + // ExtendedRotationCheck extend checks for rotation + ExtendedRotationCheck *bool `json:"extendedRotationCheck,omitempty"` } // ServerGroupSpecSecurityContext contains specification for pod security context diff --git a/pkg/deployment/reconcile/action_context.go b/pkg/deployment/reconcile/action_context.go index a29047ab1..ed4f2086e 100644 --- a/pkg/deployment/reconcile/action_context.go +++ b/pkg/deployment/reconcile/action_context.go @@ -101,6 +101,8 @@ type ActionContext interface { SetCurrentImage(imageInfo api.ImageInfo) error // GetDeploymentHealth returns a copy of the latest known state of cluster health GetDeploymentHealth() (driver.ClusterHealth, error) + // GetShardSyncStatus returns true if all shards are in sync + GetShardSyncStatus() bool // InvalidateSyncStatus resets the sync state to false and triggers an inspection InvalidateSyncStatus() // GetSpec returns a copy of the spec @@ -127,6 +129,10 @@ type actionContext struct { context Context } +func (ac *actionContext) GetShardSyncStatus() bool { + return ac.context.GetShardSyncStatus() +} + func (ac *actionContext) UpdateClusterCondition(conditionType api.ConditionType, status bool, reason, message string) error { return ac.context.WithStatusUpdate(func(s *api.DeploymentStatus) bool { return s.Conditions.Update(conditionType, status, reason, message) diff --git a/pkg/deployment/reconcile/action_wait_for_member_in_sync.go b/pkg/deployment/reconcile/action_wait_for_member_in_sync.go new file mode 100644 index 000000000..dd7afcff9 --- /dev/null +++ b/pkg/deployment/reconcile/action_wait_for_member_in_sync.go @@ -0,0 +1,98 @@ +// +// DISCLAIMER +// +// Copyright 2020 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// +// Author Adam Janikowski +// + +package reconcile + +import ( + "context" + + "github.com/arangodb/kube-arangodb/pkg/util" + + "github.com/rs/zerolog" + + api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" +) + +func init() { + registerAction(api.ActionTypeWaitForMemberInSync, newWaitForMemberInSync) +} + +// newWaitForMemberUpAction creates a new Action that implements the given +// planned WaitForShardInSync action. +func newWaitForMemberInSync(log zerolog.Logger, action api.Action, actionCtx ActionContext) Action { + a := &actionWaitForMemberInSync{} + + a.actionImpl = newActionImplDefRef(log, action, actionCtx, waitForMemberUpTimeout) + + return a +} + +// actionWaitForMemberInSync implements an WaitForShardInSync. +type actionWaitForMemberInSync struct { + // actionImpl implement timeout and member id functions + actionImpl +} + +// Start performs the start of the action. +// Returns true if the action is completely finished, false in case +// the start time needs to be recorded and a ready condition needs to be checked. +func (a *actionWaitForMemberInSync) Start(ctx context.Context) (bool, error) { + ready, _, err := a.CheckProgress(ctx) + return ready, err +} + +// CheckProgress checks the progress of the action. +// Returns true if the action is completely finished, false otherwise. +func (a *actionWaitForMemberInSync) CheckProgress(ctx context.Context) (bool, bool, error) { + ready, err := a.check(ctx) + if err != nil { + return false, false, err + } + + return ready, false, nil +} + +func (a *actionWaitForMemberInSync) check(ctx context.Context) (bool, error) { + spec := a.actionCtx.GetSpec() + + groupSpec := spec.GetServerGroupSpec(a.action.Group) + + if !util.BoolOrDefault(groupSpec.ExtendedRotationCheck, false) { + return true, nil + } + + switch spec.Mode.Get() { + case api.DeploymentModeCluster: + return a.checkCluster(ctx, spec, groupSpec) + default: + return true, nil + } +} + +func (a *actionWaitForMemberInSync) checkCluster(ctx context.Context, spec api.DeploymentSpec, groupSpec api.ServerGroupSpec) (bool, error) { + if !a.actionCtx.GetShardSyncStatus() { + a.log.Info().Str("mode", "cluster").Msgf("Shards are not in sync") + return false, nil + } + + return true, nil +} diff --git a/pkg/deployment/reconcile/plan_builder.go b/pkg/deployment/reconcile/plan_builder.go index 50bf34ba1..aca1bfe97 100644 --- a/pkg/deployment/reconcile/plan_builder.go +++ b/pkg/deployment/reconcile/plan_builder.go @@ -234,6 +234,7 @@ func createRotateMemberPlan(log zerolog.Logger, member api.MemberStatus, plan := api.Plan{ api.NewAction(api.ActionTypeRotateMember, group, member.ID, reason), api.NewAction(api.ActionTypeWaitForMemberUp, group, member.ID), + api.NewAction(api.ActionTypeWaitForMemberInSync, group, member.ID), } return plan } diff --git a/pkg/deployment/reconcile/plan_builder_test.go b/pkg/deployment/reconcile/plan_builder_test.go index fd2abd0db..da7905fed 100644 --- a/pkg/deployment/reconcile/plan_builder_test.go +++ b/pkg/deployment/reconcile/plan_builder_test.go @@ -682,6 +682,7 @@ func TestCreatePlan(t *testing.T) { ExpectedPlan: []api.Action{ api.NewAction(api.ActionTypeRotateMember, api.ServerGroupAgents, ""), api.NewAction(api.ActionTypeWaitForMemberUp, api.ServerGroupAgents, ""), + api.NewAction(api.ActionTypeWaitForMemberInSync, api.ServerGroupAgents, ""), }, ExpectedLog: "Creating rotation plan", },