1
0
Fork 0
mirror of https://github.com/arangodb/kube-arangodb.git synced 2024-12-14 11:57:37 +00:00

[Feature] Allow to recreate Local volumes (#1319)

This commit is contained in:
Adam Janikowski 2023-05-31 13:16:14 +02:00 committed by GitHub
parent 64cfaaf68c
commit c6db96dd88
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 281 additions and 2 deletions

View file

@ -11,6 +11,7 @@
- (Bugfix) Fix creating sync components with EA type set to Managed and headless svc
- (Feature) Check if Volume with LocalStorage is missing
- (Feature) Add disallowConcurrent option to ArangoBackupPolicy
- (Feature) Allow to recreate Local volumes
## [1.2.27](https://github.com/arangodb/kube-arangodb/tree/1.2.27) (2023-04-27)
- (Feature) Add InSync Cache

View file

@ -51,6 +51,7 @@
| RecreateMember | no | 15m0s | no | Community & Enterprise | Recreate member with same ID and Data |
| RefreshTLSKeyfileCertificate | no | 30m0s | no | Enterprise Only | Recreate Server TLS Certificate secret |
| RemoveMember | no | 15m0s | no | Community & Enterprise | Removes member from the Cluster and Status |
| RemoveMemberPVC | no | 15m0s | no | Community & Enterprise | Removes member PVC and enforce recreate procedure |
| RenewTLSCACertificate | no | 30m0s | no | Enterprise Only | Recreate Managed CA secret |
| RenewTLSCertificate | no | 30m0s | no | Enterprise Only | Recreate Server TLS Certificate secret |
| ResignLeadership | no | 30m0s | yes | Community & Enterprise | Run the ResignLeadership job on DBServer |
@ -139,6 +140,7 @@ spec:
RecreateMember: 15m0s
RefreshTLSKeyfileCertificate: 30m0s
RemoveMember: 15m0s
RemoveMemberPVC: 15m0s
RenewTLSCACertificate: 30m0s
RenewTLSCertificate: 30m0s
ResignLeadership: 30m0s

View file

@ -12,6 +12,9 @@ actions:
RemoveMember:
description: Removes member from the Cluster and Status
timeout: 15m
RemoveMemberPVC:
description: Removes member PVC and enforce recreate procedure
timeout: 15m
RecreateMember:
description: Recreate member with same ID and Data
timeout: 15m

View file

@ -117,6 +117,8 @@ const (
ActionRefreshTLSKeyfileCertificateDefaultTimeout time.Duration = 1800 * time.Second // 30m0s
// ActionRemoveMemberDefaultTimeout define default timeout for action ActionRemoveMember
ActionRemoveMemberDefaultTimeout time.Duration = 900 * time.Second // 15m0s
// ActionRemoveMemberPVCDefaultTimeout define default timeout for action ActionRemoveMemberPVC
ActionRemoveMemberPVCDefaultTimeout time.Duration = 900 * time.Second // 15m0s
// ActionRenewTLSCACertificateDefaultTimeout define default timeout for action ActionRenewTLSCACertificate
ActionRenewTLSCACertificateDefaultTimeout time.Duration = 1800 * time.Second // 30m0s
// ActionRenewTLSCertificateDefaultTimeout define default timeout for action ActionRenewTLSCertificate
@ -276,6 +278,8 @@ const (
ActionTypeRefreshTLSKeyfileCertificate ActionType = "RefreshTLSKeyfileCertificate"
// ActionTypeRemoveMember in scopes Normal. Removes member from the Cluster and Status
ActionTypeRemoveMember ActionType = "RemoveMember"
// ActionTypeRemoveMemberPVC in scopes Normal. Removes member PVC and enforce recreate procedure
ActionTypeRemoveMemberPVC ActionType = "RemoveMemberPVC"
// ActionTypeRenewTLSCACertificate in scopes Normal. Recreate Managed CA secret
ActionTypeRenewTLSCACertificate ActionType = "RenewTLSCACertificate"
// ActionTypeRenewTLSCertificate in scopes Normal. Recreate Server TLS Certificate secret
@ -436,6 +440,8 @@ func (a ActionType) DefaultTimeout() time.Duration {
return ActionRefreshTLSKeyfileCertificateDefaultTimeout
case ActionTypeRemoveMember:
return ActionRemoveMemberDefaultTimeout
case ActionTypeRemoveMemberPVC:
return ActionRemoveMemberPVCDefaultTimeout
case ActionTypeRenewTLSCACertificate:
return ActionRenewTLSCACertificateDefaultTimeout
case ActionTypeRenewTLSCertificate:
@ -600,6 +606,8 @@ func (a ActionType) Priority() ActionPriority {
return ActionPriorityNormal
case ActionTypeRemoveMember:
return ActionPriorityNormal
case ActionTypeRemoveMemberPVC:
return ActionPriorityNormal
case ActionTypeRenewTLSCACertificate:
return ActionPriorityNormal
case ActionTypeRenewTLSCertificate:
@ -774,6 +782,8 @@ func (a ActionType) Optional() bool {
return false
case ActionTypeRemoveMember:
return false
case ActionTypeRemoveMemberPVC:
return false
case ActionTypeRenewTLSCACertificate:
return false
case ActionTypeRenewTLSCertificate:

View file

@ -117,6 +117,8 @@ const (
ActionRefreshTLSKeyfileCertificateDefaultTimeout time.Duration = 1800 * time.Second // 30m0s
// ActionRemoveMemberDefaultTimeout define default timeout for action ActionRemoveMember
ActionRemoveMemberDefaultTimeout time.Duration = 900 * time.Second // 15m0s
// ActionRemoveMemberPVCDefaultTimeout define default timeout for action ActionRemoveMemberPVC
ActionRemoveMemberPVCDefaultTimeout time.Duration = 900 * time.Second // 15m0s
// ActionRenewTLSCACertificateDefaultTimeout define default timeout for action ActionRenewTLSCACertificate
ActionRenewTLSCACertificateDefaultTimeout time.Duration = 1800 * time.Second // 30m0s
// ActionRenewTLSCertificateDefaultTimeout define default timeout for action ActionRenewTLSCertificate
@ -276,6 +278,8 @@ const (
ActionTypeRefreshTLSKeyfileCertificate ActionType = "RefreshTLSKeyfileCertificate"
// ActionTypeRemoveMember in scopes Normal. Removes member from the Cluster and Status
ActionTypeRemoveMember ActionType = "RemoveMember"
// ActionTypeRemoveMemberPVC in scopes Normal. Removes member PVC and enforce recreate procedure
ActionTypeRemoveMemberPVC ActionType = "RemoveMemberPVC"
// ActionTypeRenewTLSCACertificate in scopes Normal. Recreate Managed CA secret
ActionTypeRenewTLSCACertificate ActionType = "RenewTLSCACertificate"
// ActionTypeRenewTLSCertificate in scopes Normal. Recreate Server TLS Certificate secret
@ -436,6 +440,8 @@ func (a ActionType) DefaultTimeout() time.Duration {
return ActionRefreshTLSKeyfileCertificateDefaultTimeout
case ActionTypeRemoveMember:
return ActionRemoveMemberDefaultTimeout
case ActionTypeRemoveMemberPVC:
return ActionRemoveMemberPVCDefaultTimeout
case ActionTypeRenewTLSCACertificate:
return ActionRenewTLSCACertificateDefaultTimeout
case ActionTypeRenewTLSCertificate:
@ -600,6 +606,8 @@ func (a ActionType) Priority() ActionPriority {
return ActionPriorityNormal
case ActionTypeRemoveMember:
return ActionPriorityNormal
case ActionTypeRemoveMemberPVC:
return ActionPriorityNormal
case ActionTypeRenewTLSCACertificate:
return ActionPriorityNormal
case ActionTypeRenewTLSCertificate:
@ -774,6 +782,8 @@ func (a ActionType) Optional() bool {
return false
case ActionTypeRemoveMember:
return false
case ActionTypeRemoveMemberPVC:
return false
case ActionTypeRenewTLSCACertificate:
return false
case ActionTypeRenewTLSCertificate:

View file

@ -271,6 +271,30 @@ func (s State) PlanServers() Servers {
return r
}
// PlanLeaderServers returns all servers which are part of the plan as a leader
func (s State) PlanLeaderServers() Servers {
q := map[Server]bool{}
for _, db := range s.Plan.Collections {
for _, col := range db {
for _, shards := range col.Shards {
if len(shards) == 0 {
continue
}
q[shards[0]] = true
}
}
}
r := make([]Server, 0, len(q))
for k := range q {
r = append(r, k)
}
return r
}
type CollectionShardDetails []CollectionShardDetail
type CollectionShardDetail struct {

View file

@ -0,0 +1,37 @@
//
// DISCLAIMER
//
// Copyright 2023 ArangoDB GmbH, Cologne, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
package features
func init() {
registerFeature(localVolumeReplacementCheck)
}
var localVolumeReplacementCheck Feature = &feature{
name: "local-volume-replacement-check",
description: "Replace volume for local-storage if volume is unschedulable (ex. node is gone)",
version: "3.6.0",
enterpriseRequired: false,
enabledByDefault: false,
}
func LocalVolumeReplacementCheck() Feature {
return localVolumeReplacementCheck
}

View file

@ -165,6 +165,9 @@ var (
_ Action = &actionRemoveMember{}
_ actionFactory = newRemoveMemberAction
_ Action = &actionRemoveMemberPVC{}
_ actionFactory = newRemoveMemberPVCAction
_ Action = &actionRenewTLSCACertificate{}
_ actionFactory = newRenewTLSCACertificateAction
@ -817,6 +820,18 @@ func init() {
registerAction(action, function)
}
// RemoveMemberPVC
{
// Get Action defition
function := newRemoveMemberPVCAction
action := api.ActionTypeRemoveMemberPVC
// Wrap action main function
// Register action
registerAction(action, function)
}
// RenewTLSCACertificate
{
// Get Action defition

View file

@ -490,6 +490,16 @@ func Test_Actions(t *testing.T) {
})
})
t.Run("RemoveMemberPVC", func(t *testing.T) {
ActionsExistence(t, api.ActionTypeRemoveMemberPVC)
t.Run("Internal", func(t *testing.T) {
require.False(t, api.ActionTypeRemoveMemberPVC.Internal())
})
t.Run("Optional", func(t *testing.T) {
require.False(t, api.ActionTypeRemoveMemberPVC.Optional())
})
})
t.Run("RenewTLSCACertificate", func(t *testing.T) {
ActionsExistence(t, api.ActionTypeRenewTLSCACertificate)
t.Run("Internal", func(t *testing.T) {

View file

@ -0,0 +1,108 @@
//
// DISCLAIMER
//
// Copyright 2023 ArangoDB GmbH, Cologne, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
package reconcile
import (
"context"
apiErrors "k8s.io/apimachinery/pkg/api/errors"
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
"github.com/arangodb/kube-arangodb/pkg/deployment/agency"
"github.com/arangodb/kube-arangodb/pkg/util/errors"
"github.com/arangodb/kube-arangodb/pkg/util/globals"
)
// newRemoveMemberPVCAction creates a new Action that implements the given
// planned RemoveMemberPVC action.
func newRemoveMemberPVCAction(action api.Action, actionCtx ActionContext) Action {
a := &actionRemoveMemberPVC{}
a.actionImpl = newActionImplDefRef(action, actionCtx)
return a
}
// actionRemoveMemberPVC implements an RemoveMemberPVCAction.
type actionRemoveMemberPVC struct {
// actionImpl implement timeout and member id functions
actionImpl
// actionEmptyCheckProgress implement check progress with empty implementation
actionEmptyCheckProgress
}
// Start performs the start of the action.
// Returns true if the action is completely finished, false in case
// the start time needs to be recorded and a ready condition needs to be checked.
func (a *actionRemoveMemberPVC) Start(ctx context.Context) (bool, error) {
m, ok := a.actionCtx.GetMemberStatusByID(a.action.MemberID)
if !ok {
return true, nil
}
pvcUID, ok := a.action.GetParam("pvc")
if !ok {
return true, errors.Newf("PVC UID Parameter is missing")
}
cache, ok := a.actionCtx.ACS().ClusterCache(m.ClusterID)
if !ok {
return true, errors.Newf("Cluster is not ready")
}
agencyCache, ok := a.actionCtx.GetAgencyCache()
if !ok {
return true, errors.Newf("Agency is not ready")
}
if agencyCache.PlanLeaderServers().Contains(agency.Server(m.ID)) {
return true, errors.Newf("Server is still used in cluster")
}
// We are safe to remove PVC
if pvcStatus := m.PersistentVolumeClaim; pvcStatus != nil {
if n := pvcStatus.GetName(); n != "" {
nctx, c := globals.GetGlobalTimeouts().Kubernetes().WithTimeout(ctx)
defer c()
err := cache.PersistentVolumeClaimsModInterface().V1().Delete(nctx, n, meta.DeleteOptions{
Preconditions: meta.NewUIDPreconditions(pvcUID),
})
if err != nil {
if apiErrors.IsNotFound(err) {
// PVC is already gone
return true, nil
}
if apiErrors.IsConflict(err) {
// UID Changed, all fine
return true, nil
}
return true, err
}
}
}
return true, nil
}

View file

@ -63,6 +63,7 @@ func (r *Reconciler) createHighPlan(ctx context.Context, apiObject k8sutil.APIOb
ApplyIfEmpty(r.createRebalancerCheckPlan).
ApplyIfEmpty(r.createMemberFailedRestoreHighPlan).
ApplyIfEmpty(r.scaleDownCandidate).
ApplyIfEmpty(r.volumeMemberReplacement).
ApplyWithBackOff(BackOffCheck, time.Minute, r.emptyPlanBuilder)).
ApplyIfEmptyWithBackOff(TimezoneCheck, time.Minute, r.createTimezoneUpdatePlan).
Apply(r.createBackupInProgressConditionPlan). // Discover backups always

View file

@ -27,11 +27,58 @@ import (
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
sharedApis "github.com/arangodb/kube-arangodb/pkg/apis/shared"
"github.com/arangodb/kube-arangodb/pkg/deployment/actions"
"github.com/arangodb/kube-arangodb/pkg/deployment/agency"
"github.com/arangodb/kube-arangodb/pkg/deployment/features"
"github.com/arangodb/kube-arangodb/pkg/deployment/reconcile/shared"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector"
)
func (r *Reconciler) volumeMemberReplacement(ctx context.Context, apiObject k8sutil.APIObject,
spec api.DeploymentSpec, status api.DeploymentStatus,
context PlanBuilderContext) api.Plan {
if !features.LocalVolumeReplacementCheck().Enabled() {
return nil
}
cache, ok := context.GetAgencyCache()
if !ok {
// Cache is not ready
return nil
}
servers := cache.PlanLeaderServers()
for _, member := range status.Members.AsList() {
if member.Member.Conditions.IsTrue(api.ConditionTypeScheduled) {
continue
}
if !member.Member.Conditions.IsTrue(api.ConditionTypeMemberVolumeUnschedulable) {
continue
}
if servers.Contains(agency.Server(member.Member.ID)) {
continue
}
if pvc := member.Member.PersistentVolumeClaim; pvc != nil {
if n := pvc.GetName(); n != "" {
client, ok := context.ACS().ClusterCache(member.Member.ClusterID)
if ok {
if pvc, ok := client.PersistentVolumeClaim().V1().GetSimple(n); ok {
// Server is not part of plan and is not ready
return api.Plan{actions.NewAction(api.ActionTypeRemoveMemberPVC, member.Group, member.Member, "PVC is unschedulable").AddParam("pvc", string(pvc.GetUID()))}
}
}
}
}
}
return nil
}
// updateMemberPhasePlan creates plan to update member phase
func (r *Reconciler) updateMemberConditionTypeMemberVolumeUnschedulableCondition(ctx context.Context, apiObject k8sutil.APIObject,
spec api.DeploymentSpec, status api.DeploymentStatus,
@ -55,6 +102,11 @@ func (r *Reconciler) updateMemberConditionTypeMemberVolumeUnschedulableCondition
unschedulable := memberConditionTypeMemberVolumeUnschedulableCalculate(cache, pv, pvc,
memberConditionTypeMemberVolumeUnschedulableLocalStorageGone)
if e.Member.Conditions.IsTrue(api.ConditionTypeScheduled) {
// We are scheduled, above checks can be ignored
unschedulable = false
}
if unschedulable == e.Member.Conditions.IsTrue(api.ConditionTypeMemberVolumeUnschedulable) {
continue
} else if unschedulable && !e.Member.Conditions.IsTrue(api.ConditionTypeMemberVolumeUnschedulable) {
@ -63,7 +115,6 @@ func (r *Reconciler) updateMemberConditionTypeMemberVolumeUnschedulableCondition
} else if !unschedulable && e.Member.Conditions.IsTrue(api.ConditionTypeMemberVolumeUnschedulable) {
plan = append(plan, shared.RemoveMemberConditionActionV2("PV Schedulable", api.ConditionTypeMemberVolumeUnschedulable, e.Group, e.Member.ID))
}
}
}
}

View file

@ -1,7 +1,7 @@
//
// DISCLAIMER
//
// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany
// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -81,6 +81,13 @@ func (r *Resources) inspectFinalizerPVCMemberExists(ctx context.Context, group a
log.Debug("Member is already failed, safe to remove member-exists finalizer")
return nil
}
if memberStatus.Conditions.IsTrue(api.ConditionTypeMemberVolumeUnschedulable) &&
!memberStatus.Conditions.IsTrue(api.ConditionTypeScheduled) {
log.Debug("Member is not scheduled and Volume is unschedulable")
return nil
}
// Inspect deployment deletion state
apiObject := r.context.GetAPIObject()
if apiObject.GetDeletionTimestamp() != nil {