1
0
Fork 0
mirror of https://github.com/arangodb/kube-arangodb.git synced 2024-12-14 11:57:37 +00:00

[Bugfix] Wait for Pod to be Ready in post-restart actions (#1206)

This commit is contained in:
Adam Janikowski 2022-12-08 18:53:40 +01:00 committed by GitHub
parent 9f4d3975b9
commit 463ab90b02
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 130 additions and 20 deletions

View file

@ -36,8 +36,8 @@
- (Feature) Switch services to Port names
- (Feature) Configurable ArangoD Port
- (Feature) Allow to exclude metrics
- (Feature) Do not restart member if all pods in group are not ready
- (Bugfix) Do not stop Sync if Synchronization is in progress
- (Bugfix) Do not stop Sync if Synchronization is in progress
- (Bugfix) Wait for Pod to be Ready in post-restart actions
## [1.2.20](https://github.com/arangodb/kube-arangodb/tree/1.2.20) (2022-10-25)
- (Feature) Add action progress

View file

@ -77,6 +77,7 @@
| UpdateTLSSNI | no | 10m0s | Enterprise Only | Update certificate in SNI |
| UpgradeMember | no | 6h0m0s | Community & Enterprise | Run the Upgrade procedure on member |
| WaitForMemberInSync | no | 30m0s | Community & Enterprise | Wait for member to be in sync. In case of DBServer waits for shards. In case of Agents to catch-up on Agency index |
| WaitForMemberReady | no | 30m0s | Community & Enterprise | Wait for member Ready condition |
| WaitForMemberUp | no | 30m0s | Community & Enterprise | Wait for member to be responsive |
@ -159,6 +160,7 @@ spec:
UpdateTLSSNI: 10m0s
UpgradeMember: 6h0m0s
WaitForMemberInSync: 30m0s
WaitForMemberReady: 30m0s
WaitForMemberUp: 30m0s
```

View file

@ -44,6 +44,9 @@ actions:
UpgradeMember:
description: Run the Upgrade procedure on member
timeout: 6h
WaitForMemberReady:
description: Wait for member Ready condition
timeout: 30m
WaitForMemberUp:
description: Wait for member to be responsive
timeout: 30m

View file

@ -171,6 +171,8 @@ const (
ActionUpgradeMemberDefaultTimeout time.Duration = 21600 * time.Second // 6h0m0s
// ActionWaitForMemberInSyncDefaultTimeout define default timeout for action ActionWaitForMemberInSync
ActionWaitForMemberInSyncDefaultTimeout time.Duration = 1800 * time.Second // 30m0s
// ActionWaitForMemberReadyDefaultTimeout define default timeout for action ActionWaitForMemberReady
ActionWaitForMemberReadyDefaultTimeout time.Duration = 1800 * time.Second // 30m0s
// ActionWaitForMemberUpDefaultTimeout define default timeout for action ActionWaitForMemberUp
ActionWaitForMemberUpDefaultTimeout time.Duration = 1800 * time.Second // 30m0s
@ -322,6 +324,8 @@ const (
ActionTypeUpgradeMember ActionType = "UpgradeMember"
// ActionTypeWaitForMemberInSync in scopes Normal. Wait for member to be in sync. In case of DBServer waits for shards. In case of Agents to catch-up on Agency index
ActionTypeWaitForMemberInSync ActionType = "WaitForMemberInSync"
// ActionTypeWaitForMemberReady in scopes Normal. Wait for member Ready condition
ActionTypeWaitForMemberReady ActionType = "WaitForMemberReady"
// ActionTypeWaitForMemberUp in scopes Normal. Wait for member to be responsive
ActionTypeWaitForMemberUp ActionType = "WaitForMemberUp"
)
@ -474,6 +478,8 @@ func (a ActionType) DefaultTimeout() time.Duration {
return ActionUpgradeMemberDefaultTimeout
case ActionTypeWaitForMemberInSync:
return ActionWaitForMemberInSyncDefaultTimeout
case ActionTypeWaitForMemberReady:
return ActionWaitForMemberReadyDefaultTimeout
case ActionTypeWaitForMemberUp:
return ActionWaitForMemberUpDefaultTimeout
default:
@ -630,6 +636,8 @@ func (a ActionType) Priority() ActionPriority {
return ActionPriorityNormal
case ActionTypeWaitForMemberInSync:
return ActionPriorityNormal
case ActionTypeWaitForMemberReady:
return ActionPriorityNormal
case ActionTypeWaitForMemberUp:
return ActionPriorityNormal
default:

View file

@ -171,6 +171,8 @@ const (
ActionUpgradeMemberDefaultTimeout time.Duration = 21600 * time.Second // 6h0m0s
// ActionWaitForMemberInSyncDefaultTimeout define default timeout for action ActionWaitForMemberInSync
ActionWaitForMemberInSyncDefaultTimeout time.Duration = 1800 * time.Second // 30m0s
// ActionWaitForMemberReadyDefaultTimeout define default timeout for action ActionWaitForMemberReady
ActionWaitForMemberReadyDefaultTimeout time.Duration = 1800 * time.Second // 30m0s
// ActionWaitForMemberUpDefaultTimeout define default timeout for action ActionWaitForMemberUp
ActionWaitForMemberUpDefaultTimeout time.Duration = 1800 * time.Second // 30m0s
@ -322,6 +324,8 @@ const (
ActionTypeUpgradeMember ActionType = "UpgradeMember"
// ActionTypeWaitForMemberInSync in scopes Normal. Wait for member to be in sync. In case of DBServer waits for shards. In case of Agents to catch-up on Agency index
ActionTypeWaitForMemberInSync ActionType = "WaitForMemberInSync"
// ActionTypeWaitForMemberReady in scopes Normal. Wait for member Ready condition
ActionTypeWaitForMemberReady ActionType = "WaitForMemberReady"
// ActionTypeWaitForMemberUp in scopes Normal. Wait for member to be responsive
ActionTypeWaitForMemberUp ActionType = "WaitForMemberUp"
)
@ -474,6 +478,8 @@ func (a ActionType) DefaultTimeout() time.Duration {
return ActionUpgradeMemberDefaultTimeout
case ActionTypeWaitForMemberInSync:
return ActionWaitForMemberInSyncDefaultTimeout
case ActionTypeWaitForMemberReady:
return ActionWaitForMemberReadyDefaultTimeout
case ActionTypeWaitForMemberUp:
return ActionWaitForMemberUpDefaultTimeout
default:
@ -630,6 +636,8 @@ func (a ActionType) Priority() ActionPriority {
return ActionPriorityNormal
case ActionTypeWaitForMemberInSync:
return ActionPriorityNormal
case ActionTypeWaitForMemberReady:
return ActionPriorityNormal
case ActionTypeWaitForMemberUp:
return ActionPriorityNormal
default:

View file

@ -246,6 +246,9 @@ var (
_ Action = &actionWaitForMemberInSync{}
_ actionFactory = newWaitForMemberInSyncAction
_ Action = &actionWaitForMemberReady{}
_ actionFactory = newWaitForMemberReadyAction
_ Action = &actionWaitForMemberUp{}
_ actionFactory = newWaitForMemberUpAction
)
@ -1132,6 +1135,18 @@ func init() {
registerAction(action, function)
}
// WaitForMemberReady
{
// Get Action defition
function := newWaitForMemberReadyAction
action := api.ActionTypeWaitForMemberReady
// Wrap action main function
// Register action
registerAction(action, function)
}
// WaitForMemberUp
{
// Get Action defition

View file

@ -544,6 +544,13 @@ func Test_Actions(t *testing.T) {
})
})
t.Run("WaitForMemberReady", func(t *testing.T) {
ActionsExistence(t, api.ActionTypeWaitForMemberReady)
t.Run("Internal", func(t *testing.T) {
require.False(t, api.ActionTypeWaitForMemberReady.Internal())
})
})
t.Run("WaitForMemberUp", func(t *testing.T) {
ActionsExistence(t, api.ActionTypeWaitForMemberUp)
t.Run("Internal", func(t *testing.T) {

View file

@ -24,7 +24,6 @@ import (
"context"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
"github.com/arangodb/kube-arangodb/pkg/deployment/actions"
"github.com/arangodb/kube-arangodb/pkg/deployment/topology"
"github.com/arangodb/kube-arangodb/pkg/util/errors"
)
@ -68,9 +67,5 @@ func (a *actionAddMember) Start(ctx context.Context) (bool, error) {
// ActionPlanAppender appends wait methods to the plan
func (a *actionAddMember) ActionPlanAppender(current api.Plan) (api.Plan, bool) {
np := api.Plan{
actions.NewAction(api.ActionTypeWaitForMemberUp, a.action.Group, withPredefinedMember(a.newMemberID), "Wait for member in sync after creation"),
actions.NewAction(api.ActionTypeWaitForMemberInSync, a.action.Group, withPredefinedMember(a.newMemberID), "Wait for member in sync after creation"),
}
return append(current, np...), true
return withWaitForMember(current, a.action.Group, withPredefinedMember(a.action.MemberID)), true
}

View file

@ -0,0 +1,67 @@
//
// DISCLAIMER
//
// Copyright 2022 ArangoDB GmbH, Cologne, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
package reconcile
import (
"context"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
"github.com/arangodb/kube-arangodb/pkg/util/errors"
)
// newWaitForMemberReadyAction creates a new Action that implements the given
// planned WaitForMemberReady action.
func newWaitForMemberReadyAction(action api.Action, actionCtx ActionContext) Action {
a := &actionWaitForMemberReady{}
a.actionImpl = newActionImplDefRef(action, actionCtx)
return a
}
// actionWaitForMemberReady implements an WaitForMemberReady.
type actionWaitForMemberReady struct {
// actionImpl implement timeout and member id functions
actionImpl
}
// Start performs the start of the action.
// Returns true if the action is completely finished, false in case
// the start time needs to be recorded and a ready condition needs to be checked.
func (a *actionWaitForMemberReady) Start(ctx context.Context) (bool, error) {
ready, _, err := a.CheckProgress(ctx)
if err != nil {
return false, errors.WithStack(err)
}
return ready, nil
}
// CheckProgress checks the progress of the action.
// Returns true if the action is completely finished, false otherwise.
func (a *actionWaitForMemberReady) CheckProgress(ctx context.Context) (bool, bool, error) {
member, ok := a.actionCtx.GetMemberStatusByID(a.MemberID())
if !ok || member.Phase == api.MemberPhaseFailed {
a.log.Debug("Member in failed phase")
return true, false, nil
}
return member.Conditions.IsTrue(api.ConditionTypeReady), false, nil
}

View file

@ -199,9 +199,7 @@ func (r *Reconciler) createUpdatePlanInternal(apiObject k8sutil.APIObject, spec
AddParam(api.ConditionTypePendingUpdate.String(), "").
AddParam(api.ConditionTypeUpdating.String(), "T")}, false
} else {
p = p.After(
actions.NewAction(api.ActionTypeWaitForMemberUp, m.Group, m.Member),
actions.NewAction(api.ActionTypeWaitForMemberInSync, m.Group, m.Member))
p = withWaitForMember(p, m.Group, m.Member)
p = p.Wrap(actions.NewAction(api.ActionTypeSetMemberCondition, m.Group, m.Member, reason).
AddParam(api.ConditionTypePendingUpdate.String(), "").AddParam(api.ConditionTypeUpdating.String(), "T"),
@ -458,9 +456,6 @@ func groupReadyForRestart(context PlanBuilderContext, status api.DeploymentStatu
return false, "Not all members are serving"
}
if !status.Members.MembersOfGroup(group).AllMembersReady() {
return false, "Not all members are ready"
}
switch group {
case api.ServerGroupDBServers:
agencyState, ok := context.GetAgencyCache()
@ -535,3 +530,15 @@ func skipResignLeadership(mode api.DeploymentMode, v driver.Version) bool {
return mode == api.DeploymentModeCluster && features.Maintenance().Enabled() && ((v.CompareTo("3.6.0") >= 0 && v.CompareTo("3.6.14") <= 0) ||
(v.CompareTo("3.7.0") >= 0 && v.CompareTo("3.7.12") <= 0))
}
func withWaitForMember(plan api.Plan, group api.ServerGroup, member api.MemberStatus) api.Plan {
return append(plan, waitForMemberActions(group, member)...)
}
func waitForMemberActions(group api.ServerGroup, member api.MemberStatus) api.Plan {
return api.Plan{
actions.NewAction(api.ActionTypeWaitForMemberUp, group, member, "Wait for member to be up after creation"),
actions.NewAction(api.ActionTypeWaitForMemberReady, group, member, "Wait for member pod to be ready after creation"),
actions.NewAction(api.ActionTypeWaitForMemberInSync, group, member, "Wait for member to be in sync after creation"),
}
}

View file

@ -137,16 +137,14 @@ func (r *Reconciler) pvcResizePlan(group api.ServerGroup, member api.MemberStatu
actions.NewAction(api.ActionTypePVCResize, group, member),
}
case api.PVCResizeModeRotate:
return api.Plan{
return withWaitForMember(api.Plan{
actions.NewAction(api.ActionTypeResignLeadership, group, member),
actions.NewAction(api.ActionTypeKillMemberPod, group, member),
actions.NewAction(api.ActionTypeRotateStartMember, group, member),
actions.NewAction(api.ActionTypePVCResize, group, member),
actions.NewAction(api.ActionTypePVCResized, group, member),
actions.NewAction(api.ActionTypeRotateStopMember, group, member),
actions.NewAction(api.ActionTypeWaitForMemberUp, group, member),
actions.NewAction(api.ActionTypeWaitForMemberInSync, group, member),
}
}, group, member)
default:
r.planLogger.Str("server-group", group.AsRole()).Str("mode", mode.String()).
Error("Requested mode is not supported")

View file

@ -55,10 +55,10 @@ func createRotateMemberPlanWithAction(member api.MemberStatus,
plan = plan.After(
actions.NewAction(api.ActionTypeKillMemberPod, group, member, reason),
actions.NewAction(action, group, member, reason),
actions.NewAction(api.ActionTypeWaitForMemberUp, group, member),
actions.NewAction(api.ActionTypeWaitForMemberInSync, group, member),
)
plan = withWaitForMember(plan, group, member)
plan = withMemberMaintenance(group, member, "Enable member maintenance", plan)
return plan