From c829a71ea9228b8e170ef1d2f657d3b4b1467d1d Mon Sep 17 00:00:00 2001 From: Adam Janikowski Date: Thu, 27 Feb 2020 08:28:25 +0100 Subject: [PATCH] Feature/new resize mode (#524) --- CHANGELOG.md | 5 + Dockerfile.ubi | 4 + Makefile | 5 +- .../deployment-operator/cluster-role.yaml | 2 +- pkg/apis/deployment/v1/plan.go | 8 + pkg/apis/deployment/v1/pvc.go | 42 +++++ pkg/apis/deployment/v1/server_group_spec.go | 2 + pkg/deployment/context_impl.go | 26 +++ pkg/deployment/reconcile/action_context.go | 32 +++- pkg/deployment/reconcile/action_pvc_resize.go | 159 ++++++++++++++++++ .../reconcile/action_pvc_resized.go | 106 ++++++++++++ .../reconcile/action_rotate_start_member.go | 128 ++++++++++++++ .../reconcile/action_rotate_stop_member.go | 83 +++++++++ pkg/deployment/reconcile/context.go | 5 + .../reconcile/plan_builder_storage.go | 64 ++++++- pkg/deployment/reconcile/plan_builder_test.go | 8 + pkg/deployment/reconcile/plan_executor.go | 8 + pkg/deployment/reconcile/timeouts.go | 2 + pkg/deployment/resources/pvc_inspector.go | 61 ------- 19 files changed, 681 insertions(+), 69 deletions(-) create mode 100644 Dockerfile.ubi create mode 100644 pkg/apis/deployment/v1/pvc.go create mode 100644 pkg/deployment/reconcile/action_pvc_resize.go create mode 100644 pkg/deployment/reconcile/action_pvc_resized.go create mode 100644 pkg/deployment/reconcile/action_rotate_start_member.go create mode 100644 pkg/deployment/reconcile/action_rotate_stop_member.go diff --git a/CHANGELOG.md b/CHANGELOG.md index ad5c4a303..7304e1ba4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Change Log +## [master](https://github.com/arangodb/kube-arangodb/tree/master) (N/A) +- Add new VolumeResize mode to be compatible with Azure flow +- Allow to customize probe configuration options +- Add new upgrade flag for ArangoDB 3.6.0<= + ## [0.4.3](https://github.com/arangodb/kube-arangodb/tree/0.4.3) (2020-01-31) - Prevent DBServer deletion if there are any shards active on it - Add Maintenance mode annotation for ArangoDeployment diff --git a/Dockerfile.ubi b/Dockerfile.ubi new file mode 100644 index 000000000..5b4e5c279 --- /dev/null +++ b/Dockerfile.ubi @@ -0,0 +1,4 @@ +ARG IMAGE=registry.access.redhat.com/ubi8/ubi-minimal:8.0 +FROM ${IMAGE} + +RUN microdnf update && microdnf clean all \ No newline at end of file diff --git a/Makefile b/Makefile index 7e7b66bd7..8a18c00fe 100644 --- a/Makefile +++ b/Makefile @@ -221,14 +221,15 @@ $(BIN): $(SOURCES) dashboard/assets.go VERSION .PHONY: docker docker: check-vars $(BIN) - docker build -f $(DOCKERFILE) --build-arg "VERSION=${VERSION_MAJOR_MINOR_PATCH}" -t $(OPERATORIMAGE) . + docker build --no-cache -f $(DOCKERFILE) --build-arg "VERSION=${VERSION_MAJOR_MINOR_PATCH}" -t $(OPERATORIMAGE) . ifdef PUSHIMAGES docker push $(OPERATORIMAGE) endif .PHONY: docker-ubi docker-ubi: check-vars $(BIN) - docker build -f $(DOCKERFILE) --build-arg "VERSION=${VERSION_MAJOR_MINOR_PATCH}" --build-arg "IMAGE=$(BASEUBIIMAGE)" -t $(OPERATORUBIIMAGE) . + docker build --no-cache -f "$(DOCKERFILE).ubi" --build-arg "VERSION=${VERSION_MAJOR_MINOR_PATCH}" --build-arg "IMAGE=$(BASEUBIIMAGE)" -t $(OPERATORUBIIMAGE)-local-only-build . + docker build --no-cache -f $(DOCKERFILE) --build-arg "VERSION=${VERSION_MAJOR_MINOR_PATCH}" --build-arg "IMAGE=$(OPERATORUBIIMAGE)-local-only-build" -t $(OPERATORUBIIMAGE) . ifdef PUSHIMAGES docker push $(OPERATORUBIIMAGE) endif diff --git a/chart/kube-arangodb/templates/deployment-operator/cluster-role.yaml b/chart/kube-arangodb/templates/deployment-operator/cluster-role.yaml index 72f0674a6..57452c53c 100644 --- a/chart/kube-arangodb/templates/deployment-operator/cluster-role.yaml +++ b/chart/kube-arangodb/templates/deployment-operator/cluster-role.yaml @@ -17,7 +17,7 @@ rules: resources: ["customresourcedefinitions"] verbs: ["get", "list", "watch"] - apiGroups: [""] - resources: ["namespaces", "nodes"] + resources: ["namespaces", "nodes", "persistentvolumes"] verbs: ["get", "list"] - apiGroups: ["storage.k8s.io"] resources: ["storageclasses"] diff --git a/pkg/apis/deployment/v1/plan.go b/pkg/apis/deployment/v1/plan.go index 3b52b3672..6ef773c9d 100644 --- a/pkg/apis/deployment/v1/plan.go +++ b/pkg/apis/deployment/v1/plan.go @@ -44,6 +44,10 @@ const ( ActionTypeShutdownMember ActionType = "ShutdownMember" // ActionTypeRotateMember causes a member to be shutdown and have it's pod removed. ActionTypeRotateMember ActionType = "RotateMember" + // ActionTypeRotateStartMember causes a member to be shutdown and have it's pod removed. Do not wait to pod recover. + ActionTypeRotateStartMember ActionType = "RotateStartMember" + // ActionTypeRotateMember causes a member to be restored. + ActionTypeRotateStopMember ActionType = "RotateStopMember" // ActionTypeUpgradeMember causes a member to be shutdown and have it's pod removed, restarted with AutoUpgrade option, waited until termination and the restarted again. ActionTypeUpgradeMember ActionType = "UpgradeMember" // ActionTypeWaitForMemberUp causes the plan to wait until the member is considered "up". @@ -58,6 +62,10 @@ const ( ActionTypeDisableClusterScaling ActionType = "ScalingDisabled" // ActionTypeEnableClusterScaling turns on scaling DBservers and coordinators ActionTypeEnableClusterScaling ActionType = "ScalingEnabled" + // ActionTypePVCResize resize event for PVC + ActionTypePVCResize ActionType = "PVCResize" + // ActionTypePVCResized waits for PVC to resize for defined time + ActionTypePVCResized ActionType = "PVCResized" ) const ( diff --git a/pkg/apis/deployment/v1/pvc.go b/pkg/apis/deployment/v1/pvc.go new file mode 100644 index 000000000..5aa96c8e2 --- /dev/null +++ b/pkg/apis/deployment/v1/pvc.go @@ -0,0 +1,42 @@ +// +// DISCLAIMER +// +// Copyright 2020 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// +// Author Adam Janikowski +// + +package v1 + +type PVCResizeMode string + +const ( + PVCResizeModeRuntime PVCResizeMode = "runtime" + PVCResizeModeRotate PVCResizeMode = "rotate" +) + +func (p *PVCResizeMode) Get() PVCResizeMode { + if p == nil { + return PVCResizeModeRuntime + } + + return *p +} + +func (p PVCResizeMode) String() string { + return string(p) +} diff --git a/pkg/apis/deployment/v1/server_group_spec.go b/pkg/apis/deployment/v1/server_group_spec.go index 3485032d1..aa86ff6d2 100644 --- a/pkg/apis/deployment/v1/server_group_spec.go +++ b/pkg/apis/deployment/v1/server_group_spec.go @@ -64,6 +64,8 @@ type ServerGroupSpec struct { PriorityClassName string `json:"priorityClassName,omitempty"` // VolumeClaimTemplate specifies a template for volume claims VolumeClaimTemplate *v1.PersistentVolumeClaim `json:"volumeClaimTemplate,omitempty"` + // VolumeResizeMode specified resize mode for pvc + VolumeResizeMode *PVCResizeMode `json:"pvcResizeMode,omitempty"` // Sidecars specifies a list of additional containers to be started Sidecars []v1.Container `json:"sidecars,omitempty"` } diff --git a/pkg/deployment/context_impl.go b/pkg/deployment/context_impl.go index 8bab730d9..16db3cbbb 100644 --- a/pkg/deployment/context_impl.go +++ b/pkg/deployment/context_impl.go @@ -25,6 +25,7 @@ package deployment import ( "context" "fmt" + "k8s.io/apimachinery/pkg/api/errors" "net" "strconv" @@ -309,6 +310,31 @@ func (d *Deployment) DeletePvc(pvcName string) error { return nil } +// UpdatePvc updated a persistent volume claim in the namespace +// of the deployment. If the pvc does not exist, the error is ignored. +func (d *Deployment) UpdatePvc(pvc *v1.PersistentVolumeClaim) error { + _, err := d.GetKubeCli().CoreV1().PersistentVolumeClaims(d.GetNamespace()).Update(pvc) + if err == nil { + return nil + } + + if errors.IsNotFound(err) { + return nil + } + + return maskAny(err) +} + +// GetPv returns PV info about PV with given name. +func (d *Deployment) GetPv(pvName string) (*v1.PersistentVolume, error) { + pv, err := d.GetKubeCli().CoreV1().PersistentVolumes().Get(pvName, metav1.GetOptions{}) + if err == nil { + return pv, nil + } + + return nil, maskAny(err) +} + // GetOwnedPods returns a list of all pods owned by the deployment. func (d *Deployment) GetOwnedPods() ([]v1.Pod, error) { // Get all current pods diff --git a/pkg/deployment/reconcile/action_context.go b/pkg/deployment/reconcile/action_context.go index a341a12fa..14efd83ac 100644 --- a/pkg/deployment/reconcile/action_context.go +++ b/pkg/deployment/reconcile/action_context.go @@ -25,9 +25,9 @@ package reconcile import ( "context" "fmt" - v1 "k8s.io/api/core/v1" - "github.com/arangodb/go-driver/agency" + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" + v1 "k8s.io/api/core/v1" "github.com/arangodb/arangosync-client/client" driver "github.com/arangodb/go-driver" @@ -40,6 +40,8 @@ import ( // ActionContext provides methods to the Action implementations // to control their context. type ActionContext interface { + // GetAPIObject returns the deployment as k8s object. + GetAPIObject() k8sutil.APIObject // Gets the specified mode of deployment GetMode() api.DeploymentMode // GetDatabaseClient returns a cached client for the entire database (cluster coordinators or single server), @@ -53,6 +55,9 @@ type ActionContext interface { GetAgency(ctx context.Context) (agency.Agency, error) // GetSyncServerClient returns a cached client for a specific arangosync server. GetSyncServerClient(ctx context.Context, group api.ServerGroup, id string) (client.API, error) + // CreateEvent creates a given event. + // On error, the error is logged. + CreateEvent(evt *k8sutil.Event) // GetMemberStatusByID returns the current member status // for the member with given id. // Returns member status, true when found, or false @@ -74,6 +79,11 @@ type ActionContext interface { // GetPvc returns PVC info about PVC with given name in the namespace // of the deployment. GetPvc(pvcName string) (*v1.PersistentVolumeClaim, error) + // GetPv returns PV info about PV with given name. + GetPv(pvName string) (*v1.PersistentVolume, error) + // UpdatePvc update PVC with given name in the namespace + // of the deployment. + UpdatePvc(pvc *v1.PersistentVolumeClaim) error // RemovePodFinalizers removes all the finalizers from the Pod with given name in the namespace // of the deployment. If the pod does not exist, the error is ignored. RemovePodFinalizers(podName string) error @@ -103,7 +113,7 @@ type ActionContext interface { // newActionContext creates a new ActionContext implementation. func newActionContext(log zerolog.Logger, context Context) ActionContext { return &actionContext{ - log: log, + log: log, context: context, } } @@ -114,6 +124,22 @@ type actionContext struct { context Context } +func (ac *actionContext) GetPv(pvName string) (*v1.PersistentVolume, error) { + return ac.context.GetPv(pvName) +} + +func (ac *actionContext) GetAPIObject() k8sutil.APIObject { + return ac.context.GetAPIObject() +} + +func (ac *actionContext) UpdatePvc(pvc *v1.PersistentVolumeClaim) error { + return ac.context.UpdatePvc(pvc) +} + +func (ac *actionContext) CreateEvent(evt *k8sutil.Event) { + ac.context.CreateEvent(evt) +} + func (ac *actionContext) GetPvc(pvcName string) (*v1.PersistentVolumeClaim, error) { return ac.context.GetPvc(pvcName) } diff --git a/pkg/deployment/reconcile/action_pvc_resize.go b/pkg/deployment/reconcile/action_pvc_resize.go new file mode 100644 index 000000000..a82a59dcb --- /dev/null +++ b/pkg/deployment/reconcile/action_pvc_resize.go @@ -0,0 +1,159 @@ +// +// DISCLAIMER +// +// Copyright 2018 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// +// Author Ewout Prangsma +// + +package reconcile + +import ( + "context" + "time" + + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" + core "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + + api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" + "github.com/rs/zerolog" +) + +// NewRotateMemberAction creates a new Action that implements the given +// planned RotateMember action. +func NewPVCResizeAction(log zerolog.Logger, action api.Action, actionCtx ActionContext) Action { + return &actionPVCResize{ + log: log, + action: action, + actionCtx: actionCtx, + } +} + +// actionRotateMember implements an RotateMember. +type actionPVCResize struct { + log zerolog.Logger + action api.Action + actionCtx ActionContext +} + +// Start performs the start of the action. +// Returns true if the action is completely finished, false in case +// the start time needs to be recorded and a ready condition needs to be checked. +func (a *actionPVCResize) Start(ctx context.Context) (bool, error) { + log := a.log + group := a.action.Group + groupSpec := a.actionCtx.GetSpec().GetServerGroupSpec(group) + m, ok := a.actionCtx.GetMemberStatusByID(a.action.MemberID) + if !ok { + log.Error().Msg("No such member") + return true, nil + } + + if m.PersistentVolumeClaimName == "" { + // Nothing to do, PVC is empty + return true, nil + } + + pvc, err := a.actionCtx.GetPvc(m.PersistentVolumeClaimName) + if err != nil { + if errors.IsNotFound(err) { + return true, nil + } + + return false, err + } + + var res core.ResourceList + if groupSpec.HasVolumeClaimTemplate() { + res = groupSpec.GetVolumeClaimTemplate().Spec.Resources.Requests + } else { + res = groupSpec.Resources.Requests + } + + if requestedSize, ok := res[core.ResourceStorage]; ok { + if volumeSize, ok := pvc.Spec.Resources.Requests[core.ResourceStorage]; ok { + cmp := volumeSize.Cmp(requestedSize) + if cmp < 0 { + pvc.Spec.Resources.Requests[core.ResourceStorage] = requestedSize + if err := a.actionCtx.UpdatePvc(pvc); err != nil { + return false, err + } + + return false, nil + } else if cmp > 0 { + log.Error().Str("server-group", group.AsRole()).Str("pvc-storage-size", volumeSize.String()).Str("requested-size", requestedSize.String()). + Msg("Volume size should not shrink") + a.actionCtx.CreateEvent(k8sutil.NewCannotShrinkVolumeEvent(a.actionCtx.GetAPIObject(), pvc.Name)) + return false, nil + } + } + } + + return true, nil +} + +// CheckProgress checks the progress of the action. +// Returns: ready, abort, error. +func (a *actionPVCResize) CheckProgress(ctx context.Context) (bool, bool, error) { + // Check that pod is removed + log := a.log + m, found := a.actionCtx.GetMemberStatusByID(a.action.MemberID) + if !found { + log.Error().Msg("No such member") + return true, false, nil + } + + pvc, err := a.actionCtx.GetPvc(m.PersistentVolumeClaimName) + if err != nil { + if errors.IsNotFound(err) { + return true, false, nil + } + + return false, true, err + } + + pv, err := a.actionCtx.GetPv(pvc.Spec.VolumeName) + if err != nil { + if errors.IsNotFound(err) { + return true, false, nil + } + + return false, true, err + } + + if requestedSize, ok := pvc.Spec.Resources.Requests[core.ResourceStorage]; ok { + if volumeSize, ok := pv.Spec.Capacity[core.ResourceStorage]; ok { + cmp := volumeSize.Cmp(requestedSize) + if cmp >= 0 { + return true, false, nil + } + } + } + + return false, false, nil +} + +// Timeout returns the amount of time after which this action will timeout. +func (a *actionPVCResize) Timeout() time.Duration { + return pvcResizeTimeout +} + +// Return the MemberID used / created in this action +func (a *actionPVCResize) MemberID() string { + return a.action.MemberID +} diff --git a/pkg/deployment/reconcile/action_pvc_resized.go b/pkg/deployment/reconcile/action_pvc_resized.go new file mode 100644 index 000000000..bd00c3a5b --- /dev/null +++ b/pkg/deployment/reconcile/action_pvc_resized.go @@ -0,0 +1,106 @@ +// +// DISCLAIMER +// +// Copyright 2018 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// +// Author Ewout Prangsma +// + +package reconcile + +import ( + "context" + "time" + + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" + core "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + + api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" + "github.com/rs/zerolog" +) + +// NewRotateMemberAction creates a new Action that implements the given +// planned RotateMember action. +func NewPVCResizedAction(log zerolog.Logger, action api.Action, actionCtx ActionContext) Action { + return &actionPVCResized{ + log: log, + action: action, + actionCtx: actionCtx, + } +} + +// actionRotateMember implements an RotateMember. +type actionPVCResized struct { + log zerolog.Logger + action api.Action + actionCtx ActionContext +} + +// Start performs the start of the action. +// Returns true if the action is completely finished, false in case +// the start time needs to be recorded and a ready condition needs to be checked. +func (a *actionPVCResized) Start(ctx context.Context) (bool, error) { + return false, nil +} + +// CheckProgress checks the progress of the action. +// Returns: ready, abort, error. +func (a *actionPVCResized) CheckProgress(ctx context.Context) (bool, bool, error) { + // Check that pod is removed + log := a.log + m, found := a.actionCtx.GetMemberStatusByID(a.action.MemberID) + if !found { + log.Error().Msg("No such member") + return true, false, nil + } + + pvc, err := a.actionCtx.GetPvc(m.PersistentVolumeClaimName) + if err != nil { + if errors.IsNotFound(err) { + return true, false, nil + } + + return false, true, err + } + + // If we are pending for FS to be resized - we need to proceed with mounting of PVC + if k8sutil.IsPersistentVolumeClaimFileSystemResizePending(pvc) { + return true, false, nil + } + + if requestedSize, ok := pvc.Spec.Resources.Requests[core.ResourceStorage]; ok { + if volumeSize, ok := pvc.Status.Capacity[core.ResourceStorage]; ok { + cmp := volumeSize.Cmp(requestedSize) + if cmp >= 0 { + return true, false, nil + } + } + } + + return false, false, nil +} + +// Timeout returns the amount of time after which this action will timeout. +func (a *actionPVCResized) Timeout() time.Duration { + return pvcResizedTimeout +} + +// Return the MemberID used / created in this action +func (a *actionPVCResized) MemberID() string { + return a.action.MemberID +} diff --git a/pkg/deployment/reconcile/action_rotate_start_member.go b/pkg/deployment/reconcile/action_rotate_start_member.go new file mode 100644 index 000000000..885d5041b --- /dev/null +++ b/pkg/deployment/reconcile/action_rotate_start_member.go @@ -0,0 +1,128 @@ +// +// DISCLAIMER +// +// Copyright 2020 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// +// Author Adam Janikowski +// + +package reconcile + +import ( + "context" + "time" + + api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" + "github.com/rs/zerolog" +) + +// NewRotateStartMemberAction creates a new Action that implements the given +// planned RotateStartMember action. +func NewRotateStartMemberAction(log zerolog.Logger, action api.Action, actionCtx ActionContext) Action { + return &actionRotateStartMember{ + log: log, + action: action, + actionCtx: actionCtx, + } +} + +// actionRotateStartMember implements an RotateStartMember. +type actionRotateStartMember struct { + log zerolog.Logger + action api.Action + actionCtx ActionContext +} + +// Start performs the start of the action. +// Returns true if the action is completely finished, false in case +// the start time needs to be recorded and a ready condition needs to be checked. +func (a *actionRotateStartMember) Start(ctx context.Context) (bool, error) { + log := a.log + group := a.action.Group + m, ok := a.actionCtx.GetMemberStatusByID(a.action.MemberID) + if !ok { + log.Error().Msg("No such member") + } + // Remove finalizers, so Kubernetes will quickly terminate the pod + if err := a.actionCtx.RemovePodFinalizers(m.PodName); err != nil { + return false, maskAny(err) + } + if group.IsArangod() { + // Invoke shutdown endpoint + c, err := a.actionCtx.GetServerClient(ctx, group, a.action.MemberID) + if err != nil { + log.Debug().Err(err).Msg("Failed to create member client") + return false, maskAny(err) + } + removeFromCluster := false + log.Debug().Bool("removeFromCluster", removeFromCluster).Msg("Shutting down member") + ctx, cancel := context.WithTimeout(ctx, shutdownTimeout) + defer cancel() + if err := c.Shutdown(ctx, removeFromCluster); err != nil { + // Shutdown failed. Let's check if we're already done + if ready, _, err := a.CheckProgress(ctx); err == nil && ready { + // We're done + return true, nil + } + log.Debug().Err(err).Msg("Failed to shutdown member") + return false, maskAny(err) + } + } else if group.IsArangosync() { + // Terminate pod + if err := a.actionCtx.DeletePod(m.PodName); err != nil { + return false, maskAny(err) + } + } + // Update status + m.Phase = api.MemberPhaseRotating + + if err := a.actionCtx.UpdateMember(m); err != nil { + return false, maskAny(err) + } + return false, nil +} + +// CheckProgress checks the progress of the action. +// Returns: ready, abort, error. +func (a *actionRotateStartMember) CheckProgress(ctx context.Context) (bool, bool, error) { + // Check that pod is removed + log := a.log + m, found := a.actionCtx.GetMemberStatusByID(a.action.MemberID) + if !found { + log.Error().Msg("No such member") + return true, false, nil + } + if !m.Conditions.IsTrue(api.ConditionTypeTerminated) { + // Pod is not yet terminated + return false, false, nil + } + // Pod is terminated, we can now remove it + if err := a.actionCtx.DeletePod(m.PodName); err != nil { + return false, false, maskAny(err) + } + return true, false, nil +} + +// Timeout returns the amount of time after which this action will timeout. +func (a *actionRotateStartMember) Timeout() time.Duration { + return rotateMemberTimeout +} + +// Return the MemberID used / created in this action +func (a *actionRotateStartMember) MemberID() string { + return a.action.MemberID +} diff --git a/pkg/deployment/reconcile/action_rotate_stop_member.go b/pkg/deployment/reconcile/action_rotate_stop_member.go new file mode 100644 index 000000000..428b40817 --- /dev/null +++ b/pkg/deployment/reconcile/action_rotate_stop_member.go @@ -0,0 +1,83 @@ +// +// DISCLAIMER +// +// Copyright 2020 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// +// Author Adam Janikowski +// + +package reconcile + +import ( + "context" + "time" + + api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" + "github.com/rs/zerolog" +) + +// NewRotateStopMemberAction creates a new Action that implements the given +// planned RotateStopMember action. +func NewRotateStopMemberAction(log zerolog.Logger, action api.Action, actionCtx ActionContext) Action { + return &actionRotateStopMember{ + log: log, + action: action, + actionCtx: actionCtx, + } +} + +// actionRotateStopMember implements an RotateStopMember. +type actionRotateStopMember struct { + log zerolog.Logger + action api.Action + actionCtx ActionContext +} + +// Start performs the start of the action. +// Returns true if the action is completely finished, false in case +// the start time needs to be recorded and a ready condition needs to be checked. +func (a *actionRotateStopMember) Start(ctx context.Context) (bool, error) { + log := a.log + m, ok := a.actionCtx.GetMemberStatusByID(a.action.MemberID) + if !ok { + log.Error().Msg("No such member") + } + + m.Phase = api.MemberPhaseNone + m.RecentTerminations = nil // Since we're rotating, we do not care about old terminations. + m.CleanoutJobID = "" + if err := a.actionCtx.UpdateMember(m); err != nil { + return false, maskAny(err) + } + return false, nil +} + +// CheckProgress checks the progress of the action. +// Returns: ready, abort, error. +func (a *actionRotateStopMember) CheckProgress(ctx context.Context) (bool, bool, error) { + return true, false, nil +} + +// Timeout returns the amount of time after which this action will timeout. +func (a *actionRotateStopMember) Timeout() time.Duration { + return rotateMemberTimeout +} + +// Return the MemberID used / created in this action +func (a *actionRotateStopMember) MemberID() string { + return a.action.MemberID +} diff --git a/pkg/deployment/reconcile/context.go b/pkg/deployment/reconcile/context.go index d7f796ddf..0baffcffa 100644 --- a/pkg/deployment/reconcile/context.go +++ b/pkg/deployment/reconcile/context.go @@ -77,8 +77,13 @@ type Context interface { RemovePodFinalizers(podName string) error // GetOwnedPods returns a list of all pods owned by the deployment. GetOwnedPods() ([]v1.Pod, error) + // UpdatePvc update PVC with given name in the namespace + // of the deployment. + UpdatePvc(pvc *v1.PersistentVolumeClaim) error // GetPvc gets a PVC by the given name, in the samespace of the deployment. GetPvc(pvcName string) (*v1.PersistentVolumeClaim, error) + // GetPv returns PV info about PV with given name. + GetPv(pvName string) (*v1.PersistentVolume, error) // GetTLSKeyfile returns the keyfile encoded TLS certificate+key for // the given member. GetTLSKeyfile(group api.ServerGroup, member api.MemberStatus) (string, error) diff --git a/pkg/deployment/reconcile/plan_builder_storage.go b/pkg/deployment/reconcile/plan_builder_storage.go index 7e98549dd..6708f2556 100644 --- a/pkg/deployment/reconcile/plan_builder_storage.go +++ b/pkg/deployment/reconcile/plan_builder_storage.go @@ -24,7 +24,7 @@ package reconcile import ( "github.com/rs/zerolog" - v1 "k8s.io/api/core/v1" + core "k8s.io/api/core/v1" api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" "github.com/arangodb/kube-arangodb/pkg/util" @@ -34,7 +34,7 @@ import ( // createRotateServerStoragePlan creates plan to rotate a server and its volume because of a // different storage class or a difference in storage resource requirements. func createRotateServerStoragePlan(log zerolog.Logger, apiObject k8sutil.APIObject, spec api.DeploymentSpec, status api.DeploymentStatus, - getPVC func(pvcName string) (*v1.PersistentVolumeClaim, error), + getPVC func(pvcName string) (*core.PersistentVolumeClaim, error), createEvent func(evt *k8sutil.Event)) api.Plan { if spec.GetMode() == api.DeploymentModeSingle { // Storage cannot be changed in single server deployments @@ -97,9 +97,69 @@ func createRotateServerStoragePlan(log zerolog.Logger, apiObject k8sutil.APIObje } else if k8sutil.IsPersistentVolumeClaimFileSystemResizePending(pvc) { // rotation needed plan = createRotateMemberPlan(log, m, group, "Filesystem resize pending") + } else { + if groupSpec.HasVolumeClaimTemplate() { + res := groupSpec.GetVolumeClaimTemplate().Spec.Resources.Requests + // For pvc only resources.requests is mutable + if comparePVCResourceList(pvc.Spec.Resources.Requests, res) { + plan = append(plan, pvcResizePlan(log, group, groupSpec, m.ID)...) + } + } else { + if requestedSize, ok := groupSpec.Resources.Requests[core.ResourceStorage]; ok { + if volumeSize, ok := pvc.Spec.Resources.Requests[core.ResourceStorage]; ok { + cmp := volumeSize.Cmp(requestedSize) + if cmp < 0 { + plan = append(plan, pvcResizePlan(log, group, groupSpec, m.ID)...) + } else if cmp > 0 { + log.Error().Str("server-group", group.AsRole()).Str("pvc-storage-size", volumeSize.String()).Str("requested-size", requestedSize.String()). + Msg("Volume size should not shrink") + } + } + } + } } } return nil }) return plan } + +func pvcResizePlan(log zerolog.Logger, group api.ServerGroup, groupSpec api.ServerGroupSpec, memberID string) api.Plan { + mode := groupSpec.VolumeResizeMode.Get() + switch mode { + case api.PVCResizeModeRuntime: + return api.Plan{ + api.NewAction(api.ActionTypePVCResize, group, memberID), + } + case api.PVCResizeModeRotate: + return api.Plan{ + api.NewAction(api.ActionTypeRotateStartMember, group, memberID), + api.NewAction(api.ActionTypePVCResize, group, memberID), + api.NewAction(api.ActionTypePVCResized, group, memberID), + api.NewAction(api.ActionTypeRotateStopMember, group, memberID), + api.NewAction(api.ActionTypeWaitForMemberUp, group, memberID), + } + default: + log.Error().Str("server-group", group.AsRole()).Str("mode", mode.String()). + Msg("Requested mode is not supported") + return nil + } +} + +func comparePVCResourceList(wanted, given core.ResourceList) bool { + for k, v := range wanted { + if gv, ok := given[k]; !ok { + return true + } else if v.Cmp(gv) != 0 { + return true + } + } + + for k := range given { + if _, ok := wanted[k]; !ok { + return true + } + } + + return false +} diff --git a/pkg/deployment/reconcile/plan_builder_test.go b/pkg/deployment/reconcile/plan_builder_test.go index e97caa4cf..7fa9967dd 100644 --- a/pkg/deployment/reconcile/plan_builder_test.go +++ b/pkg/deployment/reconcile/plan_builder_test.go @@ -54,6 +54,14 @@ type testContext struct { RecordedEvent *k8sutil.Event } +func (c *testContext) UpdatePvc(pvc *core.PersistentVolumeClaim) error { + panic("implement me") +} + +func (c *testContext) GetPv(pvName string) (*core.PersistentVolume, error) { + panic("implement me") +} + func (c *testContext) GetAgencyData(ctx context.Context, i interface{}, keyParts ...string) error { return nil } diff --git a/pkg/deployment/reconcile/plan_executor.go b/pkg/deployment/reconcile/plan_executor.go index 6cc7b81de..9d7cd6c9b 100644 --- a/pkg/deployment/reconcile/plan_executor.go +++ b/pkg/deployment/reconcile/plan_executor.go @@ -175,6 +175,10 @@ func (d *Reconciler) createAction(ctx context.Context, log zerolog.Logger, actio return NewShutdownMemberAction(log, action, actionCtx) case api.ActionTypeRotateMember: return NewRotateMemberAction(log, action, actionCtx) + case api.ActionTypeRotateStartMember: + return NewRotateStartMemberAction(log, action, actionCtx) + case api.ActionTypeRotateStopMember: + return NewRotateStopMemberAction(log, action, actionCtx) case api.ActionTypeUpgradeMember: return NewUpgradeMemberAction(log, action, actionCtx) case api.ActionTypeWaitForMemberUp: @@ -189,6 +193,10 @@ func (d *Reconciler) createAction(ctx context.Context, log zerolog.Logger, actio return NewDisableScalingCluster(log, action, actionCtx) case api.ActionTypeEnableClusterScaling: return NewEnableScalingCluster(log, action, actionCtx) + case api.ActionTypePVCResize: + return NewPVCResizeAction(log, action, actionCtx) + case api.ActionTypePVCResized: + return NewPVCResizedAction(log, action, actionCtx) default: panic(fmt.Sprintf("Unknown action type '%s'", action.Type)) } diff --git a/pkg/deployment/reconcile/timeouts.go b/pkg/deployment/reconcile/timeouts.go index a70ab895b..aadeff710 100644 --- a/pkg/deployment/reconcile/timeouts.go +++ b/pkg/deployment/reconcile/timeouts.go @@ -32,6 +32,8 @@ const ( renewTLSCertificateTimeout = time.Minute * 30 renewTLSCACertificateTimeout = time.Minute * 30 rotateMemberTimeout = time.Minute * 15 + pvcResizeTimeout = time.Minute * 15 + pvcResizedTimeout = time.Minute * 15 shutdownMemberTimeout = time.Minute * 30 upgradeMemberTimeout = time.Hour * 6 waitForMemberUpTimeout = time.Minute * 15 diff --git a/pkg/deployment/resources/pvc_inspector.go b/pkg/deployment/resources/pvc_inspector.go index fd21deb18..2c24edfbb 100644 --- a/pkg/deployment/resources/pvc_inspector.go +++ b/pkg/deployment/resources/pvc_inspector.go @@ -29,7 +29,6 @@ import ( "github.com/arangodb/kube-arangodb/pkg/metrics" "github.com/arangodb/kube-arangodb/pkg/util" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" - apiv1 "k8s.io/api/core/v1" ) var ( @@ -58,7 +57,6 @@ func (r *Resources) InspectPVCs(ctx context.Context) (util.Interval, error) { // Update member status from all pods found status, _ := r.context.GetStatus() - spec := r.context.GetSpec() for _, p := range pvcs { // PVC belongs to this deployment, update metric inspectedPVCsCounters.WithLabelValues(deploymentName).Inc() @@ -81,47 +79,6 @@ func (r *Resources) InspectPVCs(ctx context.Context) (util.Interval, error) { continue } - // Resize inspector - groupSpec := spec.GetServerGroupSpec(group) - - if groupSpec.HasVolumeClaimTemplate() { - res := groupSpec.GetVolumeClaimTemplate().Spec.Resources.Requests - // For pvc only resources.requests is mutable - if compareResourceList(p.Spec.Resources.Requests, res) { - p.Spec.Resources.Requests = res - log.Debug().Msg("volumeClaimTemplate requested resources changed - updating") - kube := r.context.GetKubeCli() - if _, err := kube.CoreV1().PersistentVolumeClaims(r.context.GetNamespace()).Update(&p); err != nil { - log.Error().Err(err).Msg("Failed to update pvc") - } else { - r.context.CreateEvent(k8sutil.NewPVCResizedEvent(r.context.GetAPIObject(), p.Name)) - } - } - } else { - if requestedSize, ok := groupSpec.Resources.Requests[apiv1.ResourceStorage]; ok { - if volumeSize, ok := p.Spec.Resources.Requests[apiv1.ResourceStorage]; ok { - cmp := volumeSize.Cmp(requestedSize) - if cmp < 0 { - // Size of the volume is smaller than the requested size - // Update the pvc with the request size - p.Spec.Resources.Requests[apiv1.ResourceStorage] = requestedSize - - log.Debug().Str("pvc-capacity", volumeSize.String()).Str("requested", requestedSize.String()).Msg("PVC capacity differs - updating") - kube := r.context.GetKubeCli() - if _, err := kube.CoreV1().PersistentVolumeClaims(r.context.GetNamespace()).Update(&p); err != nil { - log.Error().Err(err).Msg("Failed to update pvc") - } else { - r.context.CreateEvent(k8sutil.NewPVCResizedEvent(r.context.GetAPIObject(), p.Name)) - } - } else if cmp > 0 { - log.Error().Str("server-group", group.AsRole()).Str("pvc-storage-size", volumeSize.String()).Str("requested-size", requestedSize.String()). - Msg("Volume size should not shrink") - r.context.CreateEvent(k8sutil.NewCannotShrinkVolumeEvent(r.context.GetAPIObject(), p.Name)) - } - } - } - } - if k8sutil.IsPersistentVolumeClaimMarkedForDeletion(&p) { // Process finalizers if x, err := r.runPVCFinalizers(ctx, &p, group, memberStatus); err != nil { @@ -135,21 +92,3 @@ func (r *Resources) InspectPVCs(ctx context.Context) (util.Interval, error) { return nextInterval, nil } - -func compareResourceList(wanted, given apiv1.ResourceList) bool { - for k, v := range wanted { - if gv, ok := given[k]; !ok { - return true - } else if v.Cmp(gv) != 0 { - return true - } - } - - for k := range given { - if _, ok := wanted[k]; !ok { - return true - } - } - - return false -}