diff --git a/CHANGELOG.md b/CHANGELOG.md index eb9e72c4d..f41bb3b83 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## [master](https://github.com/arangodb/kube-arangodb/tree/master) (N/A) - Do not check License V2 on Community images - Add status.members.. +- Don't replace pod immediately when storage class changes - Define MemberReplacementRequired condition - Remove pod immediately when annotation is turned on - (ARM64) Add support for ARM64 enablement diff --git a/chart/kube-arangodb/templates/deployment.yaml b/chart/kube-arangodb/templates/deployment.yaml index ff09c9324..3fdd869ee 100644 --- a/chart/kube-arangodb/templates/deployment.yaml +++ b/chart/kube-arangodb/templates/deployment.yaml @@ -105,6 +105,9 @@ spec: {{ if .Values.operator.features.backup }} - --operator.backup {{- end }} +{{- if .Values.operator.debug }} + - --mode.single +{{- end }} {{ if .Values.operator.features.apps }} - --operator.apps {{- end }} diff --git a/pkg/apis/deployment/v1/server_group_spec.go b/pkg/apis/deployment/v1/server_group_spec.go index 23376df78..c6791d5f4 100644 --- a/pkg/apis/deployment/v1/server_group_spec.go +++ b/pkg/apis/deployment/v1/server_group_spec.go @@ -116,8 +116,9 @@ type ServerGroupSpec struct { // VolumeClaimTemplate specifies a template for volume claims VolumeClaimTemplate *core.PersistentVolumeClaim `json:"volumeClaimTemplate,omitempty"` // VolumeResizeMode specified resize mode for pvc - VolumeResizeMode *PVCResizeMode `json:"pvcResizeMode,omitempty"` - VolumeAllowShrink *bool `json:"volumeAllowShrink,omitempty"` + VolumeResizeMode *PVCResizeMode `json:"pvcResizeMode,omitempty"` + // Deprecated: VolumeAllowShrink allows shrink the volume + VolumeAllowShrink *bool `json:"volumeAllowShrink,omitempty"` // AntiAffinity specified additional antiAffinity settings in ArangoDB Pod definitions AntiAffinity *core.PodAntiAffinity `json:"antiAffinity,omitempty"` // Affinity specified additional affinity settings in ArangoDB Pod definitions @@ -682,6 +683,7 @@ func (s ServerGroupSpec) ResetImmutableFields(group ServerGroup, fieldPrefix str return resetFields } +// Deprecated: GetVolumeAllowShrink returns true when it is possible to shrink the volume. func (s ServerGroupSpec) GetVolumeAllowShrink() bool { if s.VolumeAllowShrink == nil { return false // Default value diff --git a/pkg/apis/deployment/v2alpha1/server_group_spec.go b/pkg/apis/deployment/v2alpha1/server_group_spec.go index 02f22e65e..e1af3e143 100644 --- a/pkg/apis/deployment/v2alpha1/server_group_spec.go +++ b/pkg/apis/deployment/v2alpha1/server_group_spec.go @@ -116,8 +116,9 @@ type ServerGroupSpec struct { // VolumeClaimTemplate specifies a template for volume claims VolumeClaimTemplate *core.PersistentVolumeClaim `json:"volumeClaimTemplate,omitempty"` // VolumeResizeMode specified resize mode for pvc - VolumeResizeMode *PVCResizeMode `json:"pvcResizeMode,omitempty"` - VolumeAllowShrink *bool `json:"volumeAllowShrink,omitempty"` + VolumeResizeMode *PVCResizeMode `json:"pvcResizeMode,omitempty"` + // Deprecated: VolumeAllowShrink allows shrink the volume + VolumeAllowShrink *bool `json:"volumeAllowShrink,omitempty"` // AntiAffinity specified additional antiAffinity settings in ArangoDB Pod definitions AntiAffinity *core.PodAntiAffinity `json:"antiAffinity,omitempty"` // Affinity specified additional affinity settings in ArangoDB Pod definitions @@ -682,6 +683,7 @@ func (s ServerGroupSpec) ResetImmutableFields(group ServerGroup, fieldPrefix str return resetFields } +// Deprecated: GetVolumeAllowShrink returns true when it is possible to shrink the volume. func (s ServerGroupSpec) GetVolumeAllowShrink() bool { if s.VolumeAllowShrink == nil { return false // Default value diff --git a/pkg/deployment/member/phase_updates.go b/pkg/deployment/member/phase_updates.go index 68d4ff3b1..ba164281c 100644 --- a/pkg/deployment/member/phase_updates.go +++ b/pkg/deployment/member/phase_updates.go @@ -86,6 +86,7 @@ func removeMemberConditionsMapFunc(m *api.MemberStatus) { m.Conditions.Remove(api.ConditionTypeUpdateFailed) m.Conditions.Remove(api.ConditionTypeCleanedOut) m.Conditions.Remove(api.ConditionTypeTopologyAware) + m.Conditions.Remove(api.MemberReplacementRequired) m.RemoveTerminationsBefore(time.Now().Add(-1 * recentTerminationsKeepPeriod)) diff --git a/pkg/deployment/reconcile/action_set_member_condition_v2.go b/pkg/deployment/reconcile/action_set_member_condition_v2.go index bd8bfbdc6..24e9680e8 100644 --- a/pkg/deployment/reconcile/action_set_member_condition_v2.go +++ b/pkg/deployment/reconcile/action_set_member_condition_v2.go @@ -70,26 +70,44 @@ func (a actionSetMemberConditionV2) Start(ctx context.Context) (bool, error) { as := a.action.Params[setConditionActionV2KeyStatus] == string(core.ConditionTrue) if err := a.actionCtx.WithStatusUpdateErr(ctx, func(s *api.DeploymentStatus) (bool, error) { - m, _, ok := s.Members.ElementByID(a.action.MemberID) - if !ok { - a.log.Info().Msg("can not set the condition because the member is gone already") - return false, nil - } + var changed bool - return m.Conditions.UpdateWithHash(api.ConditionType(aa), as, ar, am, ah), nil + s.Members.ForServerGroup(func(group api.ServerGroup, members api.MemberStatusList) error { + for i := range members { + if members[i].ID == a.action.MemberID { + changed = members[i].Conditions.UpdateWithHash(api.ConditionType(aa), as, ar, am, ah) + return nil + } + } + + a.log.Info().Msg("can not set the condition because the member is gone already") + return nil + }, a.action.Group) + + // If not found then false is returned. + return changed, nil }); err != nil { a.log.Warn().Err(err).Msgf("unable to update status") return true, nil } case setConditionActionV2KeyTypeRemove: if err := a.actionCtx.WithStatusUpdateErr(ctx, func(s *api.DeploymentStatus) (bool, error) { - m, _, ok := s.Members.ElementByID(a.action.MemberID) - if !ok { - a.log.Info().Msg("can not set the condition because the member is gone already") - return false, nil - } + var changed bool - return m.Conditions.Remove(api.ConditionType(aa)), nil + s.Members.ForServerGroup(func(group api.ServerGroup, members api.MemberStatusList) error { + for i := range members { + if members[i].ID == a.action.MemberID { + changed = members[i].Conditions.Remove(api.ConditionType(aa)) + return nil + } + } + + a.log.Info().Msg("can not remove the condition because the member is gone already") + return nil + }, a.action.Group) + + // If not found then false is returned. + return changed, nil }); err != nil { a.log.Warn().Err(err).Msgf("unable to update status") return true, nil diff --git a/pkg/deployment/reconcile/condition_member_recreation.go b/pkg/deployment/reconcile/condition_member_recreation.go index 6944e3f99..5de224f20 100644 --- a/pkg/deployment/reconcile/condition_member_recreation.go +++ b/pkg/deployment/reconcile/condition_member_recreation.go @@ -22,12 +22,18 @@ package reconcile import ( "context" + "fmt" "strings" + "github.com/rs/zerolog" + core "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + + "github.com/arangodb/kube-arangodb/pkg/apis/deployment" api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" + "github.com/arangodb/kube-arangodb/pkg/util" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector" - "github.com/rs/zerolog" ) func createMemberRecreationConditionsPlan(ctx context.Context, @@ -37,7 +43,8 @@ func createMemberRecreationConditionsPlan(ctx context.Context, var p api.Plan for _, m := range status.Members.AsList() { - resp, recreate := EvaluateMemberRecreationCondition(ctx, log, apiObject, spec, status, m.Group, m.Member, cachedStatus, context) + message, recreate := EvaluateMemberRecreationCondition(ctx, log, apiObject, spec, status, m.Group, m.Member, + cachedStatus, context, isStorageClassChanged, isVolumeSizeChanged) if !recreate { if _, ok := m.Member.Conditions.Get(api.MemberReplacementRequired); ok { @@ -45,9 +52,9 @@ func createMemberRecreationConditionsPlan(ctx context.Context, p = append(p, removeMemberConditionActionV2("Member replacement not required", api.MemberReplacementRequired, m.Group, m.Member.ID)) } } else { - if c, ok := m.Member.Conditions.Get(api.MemberReplacementRequired); !ok || !c.IsTrue() || c.Message != resp { + if c, ok := m.Member.Conditions.Get(api.MemberReplacementRequired); !ok || !c.IsTrue() || c.Message != message { // Update condition - p = append(p, updateMemberConditionActionV2("Member replacement required", api.MemberReplacementRequired, m.Group, m.Member.ID, true, "Member replacement required", resp, "")) + p = append(p, updateMemberConditionActionV2("Member replacement required", api.MemberReplacementRequired, m.Group, m.Member.ID, true, "Member replacement required", message, "")) } } } @@ -59,7 +66,7 @@ type MemberRecreationConditionEvaluator func(ctx context.Context, log zerolog.Logger, apiObject k8sutil.APIObject, spec api.DeploymentSpec, status api.DeploymentStatus, group api.ServerGroup, member api.MemberStatus, - cachedStatus inspectorInterface.Inspector, context PlanBuilderContext) (string, bool) + cachedStatus inspectorInterface.Inspector, context PlanBuilderContext) (bool, string, error) func EvaluateMemberRecreationCondition(ctx context.Context, log zerolog.Logger, apiObject k8sutil.APIObject, @@ -69,10 +76,170 @@ func EvaluateMemberRecreationCondition(ctx context.Context, args := make([]string, 0, len(evaluators)) for _, e := range evaluators { - if s, ok := e(ctx, log, apiObject, spec, status, group, member, cachedStatus, context); ok { + ok, s, err := e(ctx, log, apiObject, spec, status, group, member, cachedStatus, context) + if err != nil { + // When one of an evaluator requires pod's replacement then it should be done. + continue + } + + if ok { args = append(args, s) } } return strings.Join(args, ", "), len(args) > 0 } + +// isStorageClassChanged returns true and reason when the member should be replaced. +func isStorageClassChanged(_ context.Context, log zerolog.Logger, apiObject k8sutil.APIObject, spec api.DeploymentSpec, + _ api.DeploymentStatus, group api.ServerGroup, member api.MemberStatus, + cachedStatus inspectorInterface.Inspector, context PlanBuilderContext) (bool, string, error) { + if spec.GetMode() == api.DeploymentModeSingle { + // Storage cannot be changed in single server deployments. + return false, "", nil + } + + if member.Phase != api.MemberPhaseCreated { + // Only make changes when phase is created. + return false, "", nil + } + + if member.PersistentVolumeClaimName == "" { + // Plan is irrelevant without PVC. + return false, "", nil + } + + groupSpec := spec.GetServerGroupSpec(group) + storageClassName := groupSpec.GetStorageClassName() + if storageClassName == "" { + // A storage class is not set. + return false, "", nil + } + + // Check if a storage class changed. + if pvc, ok := cachedStatus.PersistentVolumeClaim(member.PersistentVolumeClaimName); !ok { + log.Warn().Str("role", group.AsRole()).Str("id", member.ID).Msg("Failed to get PVC") + return false, "", fmt.Errorf("failed to get PVC %s", member.PersistentVolumeClaimName) + } else { + pvcClassName := util.StringOrDefault(pvc.Spec.StorageClassName) + if pvcClassName == storageClassName { + // A storage class has not been changed. + return false, "", nil + } + if pvcClassName == "" { + // TODO what to do here? + return false, "", nil + } + } + + // From here on, it is known that a storage class has changed. + if group != api.ServerGroupDBServers && group != api.ServerGroupAgents { + // Only agents & DB servers are allowed to change their storage class. + context.CreateEvent(k8sutil.NewCannotChangeStorageClassEvent(apiObject, member.ID, group.AsRole(), "Not supported")) + return false, "", nil + } + + // From here on it is known that the member requires replacement, so `true` must be returned. + // If pod does not exist then it will try next time. + if pod, ok := cachedStatus.Pod(member.PodName); ok { + if _, ok := pod.GetAnnotations()[deployment.ArangoDeploymentPodReplaceAnnotation]; !ok { + log.Warn(). + Str("pod-name", member.PodName). + Str("server-group", group.AsRole()). + Msgf("try changing a storage class name, but %s", getRequiredReplaceMessage(member.PodName)) + // No return here. + } + } else { + return false, "", fmt.Errorf("failed to get pod %s", member.PodName) + } + + return true, "Storage class has changed", nil +} + +// isVolumeSizeChanged returns true and reason when the member should be replaced. +func isVolumeSizeChanged(_ context.Context, log zerolog.Logger, _ k8sutil.APIObject, spec api.DeploymentSpec, + _ api.DeploymentStatus, group api.ServerGroup, member api.MemberStatus, + cachedStatus inspectorInterface.Inspector, _ PlanBuilderContext) (bool, string, error) { + if spec.GetMode() == api.DeploymentModeSingle { + // Storage cannot be changed in single server deployments. + return false, "", nil + } + + if member.Phase != api.MemberPhaseCreated { + // Only make changes when phase is created. + return false, "", nil + } + + if member.PersistentVolumeClaimName == "" { + // Plan is irrelevant without PVC. + return false, "", nil + } + + pvc, ok := cachedStatus.PersistentVolumeClaim(member.PersistentVolumeClaimName) + if !ok { + log.Warn(). + Str("role", group.AsRole()). + Str("id", member.ID). + Msg("Failed to get PVC") + + return false, "", fmt.Errorf("failed to get PVC %s", member.PersistentVolumeClaimName) + } + + groupSpec := spec.GetServerGroupSpec(group) + ok, volumeSize, requestedSize := shouldVolumeResize(groupSpec, pvc) + if !ok { + return false, "", nil + } + + if group != api.ServerGroupDBServers { + log.Error(). + Str("pvc-storage-size", volumeSize.String()). + Str("requested-size", requestedSize.String()). + Msgf("Volume size should not shrink, because it is not possible for \"%s\"", group.AsRole()) + + return false, "", nil + } + + // From here on it is known that the member requires replacement, so `true` must be returned. + // If pod does not exist then it will try next time. + if pod, ok := cachedStatus.Pod(member.PodName); ok { + if _, ok := pod.GetAnnotations()[deployment.ArangoDeploymentPodReplaceAnnotation]; !ok { + log.Warn().Str("pod-name", member.PodName). + Msgf("try shrinking volume size, but %s", getRequiredReplaceMessage(member.PodName)) + // No return here. + } + } else { + return false, "", fmt.Errorf("failed to get pod %s", member.PodName) + } + + return true, "Volume is shrunk", nil +} + +// shouldVolumeResize returns false when a volume should not resize. +// Currently, it is only possible to shrink a volume size. +// When return true then the actual and required volume size are returned. +func shouldVolumeResize(groupSpec api.ServerGroupSpec, + pvc *core.PersistentVolumeClaim) (bool, resource.Quantity, resource.Quantity) { + var res core.ResourceList + if groupSpec.HasVolumeClaimTemplate() { + res = groupSpec.GetVolumeClaimTemplate().Spec.Resources.Requests + } else { + res = groupSpec.Resources.Requests + } + + if requestedSize, ok := res[core.ResourceStorage]; ok { + if volumeSize, ok := pvc.Spec.Resources.Requests[core.ResourceStorage]; ok { + if volumeSize.Cmp(requestedSize) > 0 { + // The actual PVC's volume size is greater than requested size, so it can be shrunk to the requested size. + return true, volumeSize, requestedSize + } + } + } + + return false, resource.Quantity{}, resource.Quantity{} +} + +func getRequiredReplaceMessage(podName string) string { + return fmt.Sprintf("%s annotation is required to be set on the pod %s", + deployment.ArangoDeploymentPodReplaceAnnotation, podName) +} diff --git a/pkg/deployment/reconcile/plan_builder_generator.go b/pkg/deployment/reconcile/plan_builder_generator.go index 7b9710b74..79e08ebed 100644 --- a/pkg/deployment/reconcile/plan_builder_generator.go +++ b/pkg/deployment/reconcile/plan_builder_generator.go @@ -86,7 +86,9 @@ func (d *Reconciler) generatePlan(ctx context.Context, cachedStatus inspectorInt action := result.plan[id] d.context.CreateEvent(k8sutil.NewPlanAppendEvent(d.context.GetAPIObject(), action.Type.String(), action.Group.AsRole(), action.MemberID, action.Reason)) if r := action.Reason; r != "" { - d.log.Info().Str("Action", action.Type.String()).Str("Role", action.Group.AsRole()).Str("Member", action.MemberID).Str("Type", strings.Title(result.planner.Type())).Msgf(r) + d.log.Info().Str("Action", action.Type.String()). + Str("Role", action.Group.AsRole()).Str("Member", action.MemberID). + Str("Type", strings.Title(result.planner.Type())).Msgf(r) } } diff --git a/pkg/deployment/reconcile/plan_builder_normal.go b/pkg/deployment/reconcile/plan_builder_normal.go index 4bdc409c2..1503146d9 100644 --- a/pkg/deployment/reconcile/plan_builder_normal.go +++ b/pkg/deployment/reconcile/plan_builder_normal.go @@ -72,7 +72,6 @@ func createNormalPlan(ctx context.Context, log zerolog.Logger, apiObject k8sutil ApplySubPlanIfEmpty(createTLSStatusPropagatedFieldUpdate, createCARenewalPlan). ApplySubPlanIfEmpty(createTLSStatusPropagatedFieldUpdate, createCAAppendPlan). ApplyIfEmpty(createKeyfileRenewalPlan). - ApplyIfEmpty(createRotateServerStoragePlan). ApplyIfEmpty(createRotateServerStorageResizePlan). ApplySubPlanIfEmpty(createTLSStatusPropagatedFieldUpdate, createRotateTLSServerSNIPlan). ApplyIfEmpty(createRestorePlan). diff --git a/pkg/deployment/reconcile/plan_builder_storage.go b/pkg/deployment/reconcile/plan_builder_storage.go index d53445a60..d9d8a0a57 100644 --- a/pkg/deployment/reconcile/plan_builder_storage.go +++ b/pkg/deployment/reconcile/plan_builder_storage.go @@ -23,104 +23,13 @@ package reconcile import ( "context" - "github.com/rs/zerolog" - core "k8s.io/api/core/v1" - api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" - "github.com/arangodb/kube-arangodb/pkg/util" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector" + "github.com/rs/zerolog" + core "k8s.io/api/core/v1" ) -// createRotateServerStoragePlan creates plan to rotate a server and its volume because of a -// different storage class or a difference in storage resource requirements. -func createRotateServerStoragePlan(ctx context.Context, - log zerolog.Logger, apiObject k8sutil.APIObject, - spec api.DeploymentSpec, status api.DeploymentStatus, - cachedStatus inspectorInterface.Inspector, context PlanBuilderContext) api.Plan { - if spec.GetMode() == api.DeploymentModeSingle { - // Storage cannot be changed in single server deployments - return nil - } - var plan api.Plan - status.Members.ForeachServerGroup(func(group api.ServerGroup, members api.MemberStatusList) error { - for _, m := range members { - if !plan.IsEmpty() { - // Only 1 change at a time - continue - } - if m.Phase != api.MemberPhaseCreated { - // Only make changes when phase is created - continue - } - if m.PersistentVolumeClaimName == "" { - // Plan is irrelevant without PVC - continue - } - groupSpec := spec.GetServerGroupSpec(group) - storageClassName := groupSpec.GetStorageClassName() - - // Load PVC - pvc, exists := cachedStatus.PersistentVolumeClaim(m.PersistentVolumeClaimName) - if !exists { - log.Warn(). - Str("role", group.AsRole()). - Str("id", m.ID). - Msg("Failed to get PVC") - continue - } - - if util.StringOrDefault(pvc.Spec.StorageClassName) != storageClassName && storageClassName != "" { - // Storageclass has changed - log.Info().Str("pod-name", m.PodName). - Str("pvc-storage-class", util.StringOrDefault(pvc.Spec.StorageClassName)). - Str("group-storage-class", storageClassName).Msg("Storage class has changed - pod needs replacement") - - if group == api.ServerGroupDBServers { - plan = append(plan, - api.NewAction(api.ActionTypeMarkToRemoveMember, group, m.ID)) - } else if group == api.ServerGroupAgents { - plan = append(plan, - api.NewAction(api.ActionTypeKillMemberPod, group, m.ID), - api.NewAction(api.ActionTypeShutdownMember, group, m.ID), - api.NewAction(api.ActionTypeRemoveMember, group, m.ID), - api.NewAction(api.ActionTypeAddMember, group, m.ID), - api.NewAction(api.ActionTypeWaitForMemberUp, group, m.ID), - ) - } else { - // Only agents & dbservers are allowed to change their storage class. - context.CreateEvent(k8sutil.NewCannotChangeStorageClassEvent(apiObject, m.ID, group.AsRole(), "Not supported")) - } - } else { - var res core.ResourceList - if groupSpec.HasVolumeClaimTemplate() { - res = groupSpec.GetVolumeClaimTemplate().Spec.Resources.Requests - } else { - res = groupSpec.Resources.Requests - } - if requestedSize, ok := res[core.ResourceStorage]; ok { - if volumeSize, ok := pvc.Spec.Resources.Requests[core.ResourceStorage]; ok { - cmp := volumeSize.Cmp(requestedSize) - // Only schrink is possible - if cmp > 0 { - - if groupSpec.GetVolumeAllowShrink() && group == api.ServerGroupDBServers && !m.Conditions.IsTrue(api.ConditionTypeMarkedToRemove) { - plan = append(plan, api.NewAction(api.ActionTypeMarkToRemoveMember, group, m.ID)) - } else { - log.Error().Str("server-group", group.AsRole()).Str("pvc-storage-size", volumeSize.String()).Str("requested-size", requestedSize.String()). - Msg("Volume size should not shrink") - } - } - } - } - } - } - return nil - }) - - return plan -} - // createRotateServerStorageResizePlan creates plan to resize storage func createRotateServerStorageResizePlan(ctx context.Context, log zerolog.Logger, apiObject k8sutil.APIObject, diff --git a/pkg/deployment/reconcile/plan_builder_test.go b/pkg/deployment/reconcile/plan_builder_test.go index b38acb87d..ac77e36c7 100644 --- a/pkg/deployment/reconcile/plan_builder_test.go +++ b/pkg/deployment/reconcile/plan_builder_test.go @@ -26,56 +26,44 @@ import ( "io/ioutil" "testing" + monitoring "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + monitoringClient "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned/typed/monitoring/v1" + "github.com/rs/zerolog" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + core "k8s.io/api/core/v1" + policy "k8s.io/api/policy/v1beta1" + apiErrors "k8s.io/apimachinery/pkg/api/errors" + meta "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/kubernetes" + + "github.com/arangodb/arangosync-client/client" + "github.com/arangodb/go-driver" + "github.com/arangodb/go-driver/agency" + + backupApi "github.com/arangodb/kube-arangodb/pkg/apis/backup/v1" + api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" + agencyCache "github.com/arangodb/kube-arangodb/pkg/deployment/agency" "github.com/arangodb/kube-arangodb/pkg/deployment/patch" pod2 "github.com/arangodb/kube-arangodb/pkg/deployment/pod" - - agencyCache "github.com/arangodb/kube-arangodb/pkg/deployment/agency" - + "github.com/arangodb/kube-arangodb/pkg/deployment/reconciler" + "github.com/arangodb/kube-arangodb/pkg/deployment/resources" + "github.com/arangodb/kube-arangodb/pkg/deployment/resources/inspector" + "github.com/arangodb/kube-arangodb/pkg/generated/clientset/versioned" + "github.com/arangodb/kube-arangodb/pkg/util" + "github.com/arangodb/kube-arangodb/pkg/util/arangod/conn" + "github.com/arangodb/kube-arangodb/pkg/util/errors" + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" + inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/arangomember" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/persistentvolumeclaim" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/pod" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/poddisruptionbudget" + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/secret" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/service" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/serviceaccount" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/servicemonitor" - - "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/secret" - - "github.com/arangodb/kube-arangodb/pkg/generated/clientset/versioned" - monitoringClient "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned/typed/monitoring/v1" - "k8s.io/client-go/kubernetes" - - "github.com/arangodb/kube-arangodb/pkg/deployment/resources" - - apiErrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/runtime/schema" - - "github.com/arangodb/kube-arangodb/pkg/util/errors" - inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector" - - "github.com/arangodb/kube-arangodb/pkg/util/arangod/conn" - - monitoring "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" - - policy "k8s.io/api/policy/v1beta1" - - "github.com/arangodb/kube-arangodb/pkg/deployment/resources/inspector" - - backupApi "github.com/arangodb/kube-arangodb/pkg/apis/backup/v1" - - "github.com/arangodb/arangosync-client/client" - "github.com/arangodb/go-driver/agency" - "github.com/rs/zerolog" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - meta "k8s.io/apimachinery/pkg/apis/meta/v1" - - driver "github.com/arangodb/go-driver" - api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" - "github.com/arangodb/kube-arangodb/pkg/deployment/reconciler" - "github.com/arangodb/kube-arangodb/pkg/util" - "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" - core "k8s.io/api/core/v1" ) const pvcName = "pvc_test" @@ -806,6 +794,9 @@ func TestCreatePlan(t *testing.T) { context: &testContext{ ArangoDeployment: deploymentTemplate.DeepCopy(), }, + Pods: map[string]*core.Pod{ + "dbserver1": {}, + }, Helper: func(ad *api.ArangoDeployment) { ad.Spec.DBServers = api.ServerGroupSpec{ Count: util.NewInt(3), @@ -817,18 +808,61 @@ func TestCreatePlan(t *testing.T) { } ad.Status.Members.DBServers[0].Phase = api.MemberPhaseCreated ad.Status.Members.DBServers[0].PersistentVolumeClaimName = pvcName + ad.Status.Members.DBServers[1].Phase = api.MemberPhasePending + ad.Status.Members.DBServers[1].PersistentVolumeClaimName = pvcName + ad.Status.Members.DBServers[2].Phase = api.MemberPhasePending + ad.Status.Members.DBServers[2].PersistentVolumeClaimName = pvcName + ad.Status.Members.Coordinators[0].Phase = api.MemberPhasePending + ad.Status.Members.Coordinators[1].Phase = api.MemberPhasePending + ad.Status.Members.Coordinators[2].Phase = api.MemberPhasePending }, - ExpectedPlan: []api.Action{ - api.NewAction(api.ActionTypeMarkToRemoveMember, api.ServerGroupDBServers, ""), + ExpectedEvent: &k8sutil.Event{ + Type: core.EventTypeNormal, + Reason: "Plan Action added", + Message: "A plan item of type SetMemberConditionV2 for member dbserver with role 1 has been added " + + "with reason: Member replacement required", + }, + ExpectedHighPlan: []api.Action{ + api.NewAction(api.ActionTypeSetMemberConditionV2, api.ServerGroupDBServers, "", "Member replacement required"), + }, + ExpectedLog: "Member replacement required", + }, + { + Name: "Wait for changing Storage for DBServers", + PVCS: map[string]*core.PersistentVolumeClaim{ + pvcName: { + Spec: core.PersistentVolumeClaimSpec{ + StorageClassName: util.NewString("oldStorage"), + }, + }, + }, + context: &testContext{ + ArangoDeployment: deploymentTemplate.DeepCopy(), + }, + Helper: func(ad *api.ArangoDeployment) { + ad.Spec.DBServers = api.ServerGroupSpec{ + Count: util.NewInt(3), + VolumeClaimTemplate: &core.PersistentVolumeClaim{ + Spec: core.PersistentVolumeClaimSpec{ + StorageClassName: util.NewString("newStorage"), + }, + }, + } + ad.Status.Members.DBServers[0].Phase = api.MemberPhaseCreated + ad.Status.Members.DBServers[0].PersistentVolumeClaimName = pvcName + cond := api.Condition{ + Type: api.MemberReplacementRequired, + Status: conditionTrue, + } + ad.Status.Members.DBServers[0].Conditions = append(ad.Status.Members.DBServers[0].Conditions, cond) }, - ExpectedLog: "Storage class has changed - pod needs replacement", }, { Name: "Change Storage for Agents with deprecated storage class name", PVCS: map[string]*core.PersistentVolumeClaim{ pvcName: { Spec: core.PersistentVolumeClaimSpec{ - StorageClassName: util.NewString("oldStorage"), + StorageClassName: util.NewString(""), }, }, }, @@ -842,15 +876,17 @@ func TestCreatePlan(t *testing.T) { } ad.Status.Members.Agents[0].Phase = api.MemberPhaseCreated ad.Status.Members.Agents[0].PersistentVolumeClaimName = pvcName + ad.Status.Members.Agents[1].Phase = api.MemberPhasePending + ad.Status.Members.Agents[1].PersistentVolumeClaimName = pvcName + ad.Status.Members.Agents[2].Phase = api.MemberPhasePending + ad.Status.Members.Agents[2].PersistentVolumeClaimName = pvcName + ad.Status.Members.Coordinators[0].Phase = api.MemberPhasePending + ad.Status.Members.Coordinators[1].Phase = api.MemberPhasePending + ad.Status.Members.Coordinators[2].Phase = api.MemberPhasePending + ad.Status.Members.DBServers[0].Phase = api.MemberPhasePending + ad.Status.Members.DBServers[1].Phase = api.MemberPhasePending + ad.Status.Members.DBServers[2].Phase = api.MemberPhasePending }, - ExpectedPlan: []api.Action{ - api.NewAction(api.ActionTypeKillMemberPod, api.ServerGroupAgents, ""), - api.NewAction(api.ActionTypeShutdownMember, api.ServerGroupAgents, ""), - api.NewAction(api.ActionTypeRemoveMember, api.ServerGroupAgents, ""), - api.NewAction(api.ActionTypeAddMember, api.ServerGroupAgents, ""), - api.NewAction(api.ActionTypeWaitForMemberUp, api.ServerGroupAgents, ""), - }, - ExpectedLog: "Storage class has changed - pod needs replacement", }, { Name: "Storage for Coordinators is not possible", @@ -875,9 +911,16 @@ func TestCreatePlan(t *testing.T) { } ad.Status.Members.Coordinators[0].Phase = api.MemberPhaseCreated ad.Status.Members.Coordinators[0].PersistentVolumeClaimName = pvcName + ad.Status.Members.Coordinators[1].Phase = api.MemberPhasePending + ad.Status.Members.Coordinators[2].Phase = api.MemberPhasePending + ad.Status.Members.DBServers[0].Phase = api.MemberPhasePending + ad.Status.Members.DBServers[1].Phase = api.MemberPhasePending + ad.Status.Members.DBServers[2].Phase = api.MemberPhasePending + ad.Status.Members.Agents[0].Phase = api.MemberPhasePending + ad.Status.Members.Agents[1].Phase = api.MemberPhasePending + ad.Status.Members.Agents[2].Phase = api.MemberPhasePending }, ExpectedPlan: []api.Action{}, - ExpectedLog: "Storage class has changed - pod needs replacement", ExpectedEvent: &k8sutil.Event{ Type: core.EventTypeNormal, Reason: "Coordinator Member StorageClass Cannot Change", diff --git a/pkg/deployment/resources/pvcs.go b/pkg/deployment/resources/pvcs.go index c7263d16b..f5f3f7a75 100644 --- a/pkg/deployment/resources/pvcs.go +++ b/pkg/deployment/resources/pvcs.go @@ -41,7 +41,6 @@ func (r *Resources) createPVCFinalizers(group api.ServerGroup) []string { func (r *Resources) EnsurePVCs(ctx context.Context, cachedStatus inspectorInterface.Inspector) error { apiObject := r.context.GetAPIObject() deploymentName := apiObject.GetName() - ns := apiObject.GetNamespace() owner := apiObject.AsOwner() iterator := r.context.GetServerGroupIterator() status, _ := r.context.GetStatus() @@ -53,17 +52,19 @@ func (r *Resources) EnsurePVCs(ctx context.Context, cachedStatus inspectorInterf continue } - _, exists := cachedStatus.PersistentVolumeClaim(m.PersistentVolumeClaimName) - if exists { + if _, exists := cachedStatus.PersistentVolumeClaim(m.PersistentVolumeClaimName); exists { continue } + storageClassName := spec.GetStorageClassName() role := group.AsRole() resources := spec.Resources vct := spec.VolumeClaimTemplate finalizers := r.createPVCFinalizers(group) err := globals.GetGlobalTimeouts().Kubernetes().RunWithTimeout(ctx, func(ctxChild context.Context) error { - return k8sutil.CreatePersistentVolumeClaim(ctxChild, r.context.PersistentVolumeClaimsModInterface(), m.PersistentVolumeClaimName, deploymentName, ns, storageClassName, role, enforceAntiAffinity, resources, vct, finalizers, owner) + return k8sutil.CreatePersistentVolumeClaim(ctxChild, r.context.PersistentVolumeClaimsModInterface(), + m.PersistentVolumeClaimName, deploymentName, storageClassName, role, enforceAntiAffinity, + resources, vct, finalizers, owner) }) if err != nil { return errors.WithStack(err) diff --git a/pkg/util/k8sutil/events.go b/pkg/util/k8sutil/events.go index fd18ca429..c034544a2 100644 --- a/pkg/util/k8sutil/events.go +++ b/pkg/util/k8sutil/events.go @@ -156,11 +156,11 @@ func NewPlanAppendEvent(apiObject APIObject, itemType, memberID, role, reason st event := newDeploymentEvent(apiObject) event.Type = v1.EventTypeNormal event.Reason = "Plan Action added" - msg := fmt.Sprintf("An plan item of type %s", itemType) + msg := fmt.Sprintf("A plan item of type %s", itemType) if role != "" { msg = fmt.Sprintf("%s for member %s with role %s", msg, memberID, role) } - msg = fmt.Sprintf("%s has beed added", msg) + msg = fmt.Sprintf("%s has been added", msg) if reason != "" { msg = fmt.Sprintf("%s with reason: %s", msg, reason) } @@ -174,7 +174,7 @@ func NewPlanTimeoutEvent(apiObject APIObject, itemType, memberID, role string) * event := newDeploymentEvent(apiObject) event.Type = v1.EventTypeNormal event.Reason = "Reconciliation Plan Timeout" - event.Message = fmt.Sprintf("An plan item of type %s or member %s with role %s did not finish in time", itemType, memberID, role) + event.Message = fmt.Sprintf("A plan item of type %s or member %s with role %s did not finish in time", itemType, memberID, role) return event } @@ -184,7 +184,7 @@ func NewPlanAbortedEvent(apiObject APIObject, itemType, memberID, role string) * event := newDeploymentEvent(apiObject) event.Type = v1.EventTypeNormal event.Reason = "Reconciliation Plan Aborted" - event.Message = fmt.Sprintf("An plan item of type %s or member %s with role %s wants to abort the plan", itemType, memberID, role) + event.Message = fmt.Sprintf("A plan item of type %s or member %s with role %s wants to abort the plan", itemType, memberID, role) return event } diff --git a/pkg/util/k8sutil/pvc.go b/pkg/util/k8sutil/pvc.go index 0faf9aecb..80fa2fcb3 100644 --- a/pkg/util/k8sutil/pvc.go +++ b/pkg/util/k8sutil/pvc.go @@ -78,7 +78,9 @@ func ExtractStorageResourceRequirement(resources v1.ResourceRequirements) v1.Res // CreatePersistentVolumeClaim creates a persistent volume claim with given name and configuration. // If the pvc already exists, nil is returned. // If another error occurs, that error is returned. -func CreatePersistentVolumeClaim(ctx context.Context, pvcs persistentvolumeclaim.ModInterface, pvcName, deploymentName, ns, storageClassName, role string, enforceAntiAffinity bool, resources v1.ResourceRequirements, vct *v1.PersistentVolumeClaim, finalizers []string, owner metav1.OwnerReference) error { +func CreatePersistentVolumeClaim(ctx context.Context, pvcs persistentvolumeclaim.ModInterface, pvcName, deploymentName, + storageClassName, role string, enforceAntiAffinity bool, resources v1.ResourceRequirements, + vct *v1.PersistentVolumeClaim, finalizers []string, owner metav1.OwnerReference) error { labels := LabelsForDeployment(deploymentName, role) volumeMode := v1.PersistentVolumeFilesystem pvc := &v1.PersistentVolumeClaim{