From 841af834f2428071633841922518984d0bcaf9c0 Mon Sep 17 00:00:00 2001 From: Adam Janikowski <12255597+ajanikow@users.noreply.github.com> Date: Thu, 29 Oct 2020 13:52:13 +0100 Subject: [PATCH] [Feature] Allow Volume Shrink and DBServer replacement (#652) --- pkg/apis/deployment/annotations.go | 1 + pkg/apis/deployment/v1/conditions.go | 2 + pkg/apis/deployment/v1/member_status_list.go | 6 ++ pkg/apis/deployment/v1/plan.go | 2 + pkg/apis/deployment/v1/server_group_spec.go | 11 ++- pkg/deployment/reconcile/action_add_member.go | 15 ++++ .../reconcile/action_mark_to_remove_member.go | 78 +++++++++++++++++++ pkg/deployment/reconcile/action_pvc_resize.go | 15 +++- pkg/deployment/reconcile/plan_builder.go | 7 +- .../reconcile/plan_builder_rotate_upgrade.go | 6 ++ .../reconcile/plan_builder_scale.go | 35 ++++++++- .../reconcile/plan_builder_storage.go | 8 +- 12 files changed, 178 insertions(+), 8 deletions(-) create mode 100644 pkg/deployment/reconcile/action_mark_to_remove_member.go diff --git a/pkg/apis/deployment/annotations.go b/pkg/apis/deployment/annotations.go index f47e26353..07f7df9c6 100644 --- a/pkg/apis/deployment/annotations.go +++ b/pkg/apis/deployment/annotations.go @@ -26,4 +26,5 @@ const ( ArangoDeploymentAnnotationPrefix = "deployment.arangodb.com" ArangoDeploymentPodMaintenanceAnnotation = ArangoDeploymentAnnotationPrefix + "/maintenance" ArangoDeploymentPodRotateAnnotation = ArangoDeploymentAnnotationPrefix + "/rotate" + ArangoDeploymentPodReplaceAnnotation = ArangoDeploymentAnnotationPrefix + "/replace" ) diff --git a/pkg/apis/deployment/v1/conditions.go b/pkg/apis/deployment/v1/conditions.go index 1fc8bcb3f..213340b3b 100644 --- a/pkg/apis/deployment/v1/conditions.go +++ b/pkg/apis/deployment/v1/conditions.go @@ -61,6 +61,8 @@ const ( ConditionTypeTerminating ConditionType = "Terminating" // ConditionTypeTerminating indicates that the deployment is up to date. ConditionTypeUpToDate ConditionType = "UpToDate" + // ConditionTypeMarkedToRemove indicates that the member is marked to be removed. + ConditionTypeMarkedToRemove ConditionType = "MarkedToRemove" ) // Condition represents one current condition of a deployment or deployment member. diff --git a/pkg/apis/deployment/v1/member_status_list.go b/pkg/apis/deployment/v1/member_status_list.go index 87d8d5e29..3fe16a630 100644 --- a/pkg/apis/deployment/v1/member_status_list.go +++ b/pkg/apis/deployment/v1/member_status_list.go @@ -142,6 +142,12 @@ func (l *MemberStatusList) removeByID(id string) error { // Returns an error if the list is empty. func (l MemberStatusList) SelectMemberToRemove() (MemberStatus, error) { if len(l) > 0 { + // Try to find member with phase to be removed + for _, m := range l { + if m.Conditions.IsTrue(ConditionTypeMarkedToRemove) { + return m, nil + } + } // Try to find a not ready member for _, m := range l { if m.Phase == MemberPhaseNone { diff --git a/pkg/apis/deployment/v1/plan.go b/pkg/apis/deployment/v1/plan.go index 74b721e5a..6fea42ced 100644 --- a/pkg/apis/deployment/v1/plan.go +++ b/pkg/apis/deployment/v1/plan.go @@ -41,6 +41,8 @@ const ( ActionTypeIdle ActionType = "Idle" // ActionTypeAddMember causes a member to be added. ActionTypeAddMember ActionType = "AddMember" + // ActionTypeMarkToRemoveMember marks member to be removed. + ActionTypeMarkToRemoveMember ActionType = "MarkToRemoveMember" // ActionTypeRemoveMember causes a member to be removed. ActionTypeRemoveMember ActionType = "RemoveMember" // ActionTypeRecreateMember recreates member. Used when member is still owner of some shards. diff --git a/pkg/apis/deployment/v1/server_group_spec.go b/pkg/apis/deployment/v1/server_group_spec.go index 943e0383d..f9fa96b07 100644 --- a/pkg/apis/deployment/v1/server_group_spec.go +++ b/pkg/apis/deployment/v1/server_group_spec.go @@ -83,7 +83,8 @@ type ServerGroupSpec struct { // VolumeClaimTemplate specifies a template for volume claims VolumeClaimTemplate *core.PersistentVolumeClaim `json:"volumeClaimTemplate,omitempty"` // VolumeResizeMode specified resize mode for pvc - VolumeResizeMode *PVCResizeMode `json:"pvcResizeMode,omitempty"` + VolumeResizeMode *PVCResizeMode `json:"pvcResizeMode,omitempty"` + VolumeAllowShrink *bool `json:"volumeAllowShrink,omitempty"` // AntiAffinity specified additional antiAffinity settings in ArangoDB Pod definitions AntiAffinity *core.PodAntiAffinity `json:"antiAffinity,omitempty"` // Affinity specified additional affinity settings in ArangoDB Pod definitions @@ -614,3 +615,11 @@ func (s ServerGroupSpec) ResetImmutableFields(group ServerGroup, fieldPrefix str } return resetFields } + +func (s ServerGroupSpec) GetVolumeAllowShrink() bool { + if s.VolumeAllowShrink == nil { + return false // Default value + } + + return *s.VolumeAllowShrink +} diff --git a/pkg/deployment/reconcile/action_add_member.go b/pkg/deployment/reconcile/action_add_member.go index f85211fa1..806dead52 100644 --- a/pkg/deployment/reconcile/action_add_member.go +++ b/pkg/deployment/reconcile/action_add_member.go @@ -65,5 +65,20 @@ func (a *actionAddMember) Start(ctx context.Context) (bool, error) { return false, maskAny(err) } a.newMemberID = newID + + if _, ok := a.action.Params[api.ActionTypeWaitForMemberUp.String()]; ok { + a.actionCtx.WithStatusUpdate(func(s *api.DeploymentStatus) bool { + s.Plan = append(s.Plan, api.NewAction(api.ActionTypeWaitForMemberInSync, a.action.Group, newID, "Wait for member in sync after creation")) + return true + }) + } + + if _, ok := a.action.Params[api.ActionTypeWaitForMemberInSync.String()]; ok { + a.actionCtx.WithStatusUpdate(func(s *api.DeploymentStatus) bool { + s.Plan = append(s.Plan, api.NewAction(api.ActionTypeWaitForMemberInSync, a.action.Group, newID, "Wait for member in sync after creation")) + return true + }) + } + return true, nil } diff --git a/pkg/deployment/reconcile/action_mark_to_remove_member.go b/pkg/deployment/reconcile/action_mark_to_remove_member.go new file mode 100644 index 000000000..f02a6808a --- /dev/null +++ b/pkg/deployment/reconcile/action_mark_to_remove_member.go @@ -0,0 +1,78 @@ +// +// DISCLAIMER +// +// Copyright 2020 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// +// Author Ewout Prangsma +// + +package reconcile + +import ( + "context" + + api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" + "github.com/rs/zerolog" +) + +func init() { + registerAction(api.ActionTypeMarkToRemoveMember, newMarkToRemoveMemberAction) +} + +func newMarkToRemoveMemberAction(log zerolog.Logger, action api.Action, actionCtx ActionContext) Action { + a := &actionMarkToRemove{} + + a.actionImpl = newActionImplDefRef(log, action, actionCtx, addMemberTimeout) + + return a +} + +type actionMarkToRemove struct { + // actionImpl implement timeout and member id functions + actionImpl + + // actionEmptyCheckProgress implement check progress with empty implementation + actionEmptyCheckProgress +} + +func (a *actionMarkToRemove) Start(ctx context.Context) (bool, error) { + if a.action.Group != api.ServerGroupDBServers { + return true, nil + } + + return true, a.actionCtx.WithStatusUpdate(func(s *api.DeploymentStatus) bool { + member, group, ok := s.Members.ElementByID(a.action.MemberID) + if !ok { + return false + } + + if group != a.action.Group { + return false + } + + if !member.Conditions.Update(api.ConditionTypeMarkedToRemove, true, "Member marked to be removed", "") { + return false + } + + if err := s.Members.Update(member, group); err != nil { + a.log.Warn().Err(err).Str("Member", member.ID).Msgf("Unable to update member") + return false + } + + return true + }) +} diff --git a/pkg/deployment/reconcile/action_pvc_resize.go b/pkg/deployment/reconcile/action_pvc_resize.go index 649c0ec86..41b10fb37 100644 --- a/pkg/deployment/reconcile/action_pvc_resize.go +++ b/pkg/deployment/reconcile/action_pvc_resize.go @@ -98,9 +98,18 @@ func (a *actionPVCResize) Start(ctx context.Context) (bool, error) { return false, nil } else if cmp > 0 { - log.Error().Str("server-group", group.AsRole()).Str("pvc-storage-size", volumeSize.String()).Str("requested-size", requestedSize.String()). - Msg("Volume size should not shrink") - a.actionCtx.CreateEvent(k8sutil.NewCannotShrinkVolumeEvent(a.actionCtx.GetAPIObject(), pvc.Name)) + if groupSpec.GetVolumeAllowShrink() && group == api.ServerGroupDBServers { + if err := a.actionCtx.WithStatusUpdate(func(s *api.DeploymentStatus) bool { + s.Plan = append(s.Plan, api.NewAction(api.ActionTypeMarkToRemoveMember, group, m.ID)) + return true + }); err != nil { + log.Error().Err(err).Msg("Unable to mark instance to be replaced") + } + } else { + log.Error().Str("server-group", group.AsRole()).Str("pvc-storage-size", volumeSize.String()).Str("requested-size", requestedSize.String()). + Msg("Volume size should not shrink") + a.actionCtx.CreateEvent(k8sutil.NewCannotShrinkVolumeEvent(a.actionCtx.GetAPIObject(), pvc.Name)) + } return false, nil } } diff --git a/pkg/deployment/reconcile/plan_builder.go b/pkg/deployment/reconcile/plan_builder.go index 3b6fc18de..7f3c909d7 100644 --- a/pkg/deployment/reconcile/plan_builder.go +++ b/pkg/deployment/reconcile/plan_builder.go @@ -214,7 +214,12 @@ func createPlan(ctx context.Context, log zerolog.Logger, apiObject k8sutil.APIOb // Check for scale up/down if plan.IsEmpty() { - plan = pb.Apply(createScaleMemeberPlan) + plan = pb.Apply(createScaleMemberPlan) + } + + // Check for members to be removed + if plan.IsEmpty() { + plan = pb.Apply(createReplaceMemberPlan) } // Check for the need to rotate one or more members diff --git a/pkg/deployment/reconcile/plan_builder_rotate_upgrade.go b/pkg/deployment/reconcile/plan_builder_rotate_upgrade.go index 39f99a8a5..9b8d7ccac 100644 --- a/pkg/deployment/reconcile/plan_builder_rotate_upgrade.go +++ b/pkg/deployment/reconcile/plan_builder_rotate_upgrade.go @@ -144,8 +144,14 @@ func createRotateOrUpgradePlanInternal(log zerolog.Logger, apiObject k8sutil.API } if pod.Annotations != nil { + if _, ok := pod.Annotations[deployment.ArangoDeploymentPodReplaceAnnotation]; ok && group == api.ServerGroupDBServers { + newPlan = api.Plan{api.NewAction(api.ActionTypeMarkToRemoveMember, group, m.ID, "Replace flag present")} + continue + } + if _, ok := pod.Annotations[deployment.ArangoDeploymentPodRotateAnnotation]; ok { newPlan = createRotateMemberPlan(log, m, group, "Rotation flag present") + continue } } } diff --git a/pkg/deployment/reconcile/plan_builder_scale.go b/pkg/deployment/reconcile/plan_builder_scale.go index 9fdb4747c..14f789e94 100644 --- a/pkg/deployment/reconcile/plan_builder_scale.go +++ b/pkg/deployment/reconcile/plan_builder_scale.go @@ -31,7 +31,7 @@ import ( "github.com/rs/zerolog" ) -func createScaleMemeberPlan(ctx context.Context, +func createScaleMemberPlan(ctx context.Context, log zerolog.Logger, apiObject k8sutil.APIObject, spec api.DeploymentSpec, status api.DeploymentStatus, cachedStatus inspector.Inspector, context PlanBuilderContext) api.Plan { @@ -102,3 +102,36 @@ func createScalePlan(log zerolog.Logger, members api.MemberStatusList, group api } return plan } + +func createReplaceMemberPlan(ctx context.Context, + log zerolog.Logger, apiObject k8sutil.APIObject, + spec api.DeploymentSpec, status api.DeploymentStatus, + cachedStatus inspector.Inspector, context PlanBuilderContext) api.Plan { + + var plan api.Plan + + // Replace is only allowed for DBServers + switch spec.GetMode() { + case api.DeploymentModeCluster: + status.Members.ForeachServerInGroups(func(group api.ServerGroup, list api.MemberStatusList) error { + for _, member := range list { + if !plan.IsEmpty() { + return nil + } + if member.Conditions.IsTrue(api.ConditionTypeMarkedToRemove) { + plan = append(plan, api.NewAction(api.ActionTypeAddMember, group, ""). + AddParam(api.ActionTypeWaitForMemberInSync.String(), ""). + AddParam(api.ActionTypeWaitForMemberUp.String(), "")) + log.Debug(). + Str("role", group.AsRole()). + Msg("Creating replacement plan") + return nil + } + } + + return nil + }, api.ServerGroupDBServers) + } + + return plan +} diff --git a/pkg/deployment/reconcile/plan_builder_storage.go b/pkg/deployment/reconcile/plan_builder_storage.go index 5869ca0a3..17a54c260 100644 --- a/pkg/deployment/reconcile/plan_builder_storage.go +++ b/pkg/deployment/reconcile/plan_builder_storage.go @@ -115,8 +115,12 @@ func createRotateServerStoragePlan(ctx context.Context, if cmp < 0 { plan = append(plan, pvcResizePlan(log, group, groupSpec, m.ID)...) } else if cmp > 0 { - log.Error().Str("server-group", group.AsRole()).Str("pvc-storage-size", volumeSize.String()).Str("requested-size", requestedSize.String()). - Msg("Volume size should not shrink") + if groupSpec.GetVolumeAllowShrink() && group == api.ServerGroupDBServers { + plan = append(plan, api.NewAction(api.ActionTypeMarkToRemoveMember, group, m.ID)) + } else { + log.Error().Str("server-group", group.AsRole()).Str("pvc-storage-size", volumeSize.String()).Str("requested-size", requestedSize.String()). + Msg("Volume size should not shrink") + } } } }