mirror of
https://github.com/arangodb/kube-arangodb.git
synced 2024-12-14 11:57:37 +00:00
[Feature] Add ResignLeadership to plan execution (#687)
This commit is contained in:
parent
cb1f17a264
commit
a8e1967e33
7 changed files with 164 additions and 1 deletions
|
@ -4,6 +4,7 @@
|
|||
- Add support for spec.ClusterDomain to be able to use FQDN in ArangoDB cluster communication
|
||||
- Add Version Check feature with extended Upgrade checks
|
||||
- Fix Upgrade failures recovery
|
||||
- Add ResignLeadership action before Upgrade, Restart and Shutdown actions
|
||||
|
||||
## [1.1.3](https://github.com/arangodb/kube-arangodb/tree/1.1.3) (2020-12-16)
|
||||
- Add v2alpha1 API for ArangoDeployment and ArangoDeploymentReplication
|
||||
|
|
|
@ -51,6 +51,8 @@ const (
|
|||
ActionTypeCleanOutMember ActionType = "CleanOutMember"
|
||||
// ActionTypeShutdownMember causes a member to be shutdown and removed from the cluster.
|
||||
ActionTypeShutdownMember ActionType = "ShutdownMember"
|
||||
// ActionTypeResignLeadership causes a member to resign leadership.
|
||||
ActionTypeResignLeadership ActionType = "ResignLeadership"
|
||||
// ActionTypeRotateMember causes a member to be shutdown and have it's pod removed.
|
||||
ActionTypeRotateMember ActionType = "RotateMember"
|
||||
// ActionTypeRotateStartMember causes a member to be shutdown and have it's pod removed. Do not wait to pod recover.
|
||||
|
|
|
@ -51,6 +51,8 @@ const (
|
|||
ActionTypeCleanOutMember ActionType = "CleanOutMember"
|
||||
// ActionTypeShutdownMember causes a member to be shutdown and removed from the cluster.
|
||||
ActionTypeShutdownMember ActionType = "ShutdownMember"
|
||||
// ActionTypeResignLeadership causes a member to resign leadership.
|
||||
ActionTypeResignLeadership ActionType = "ResignLeadership"
|
||||
// ActionTypeRotateMember causes a member to be shutdown and have it's pod removed.
|
||||
ActionTypeRotateMember ActionType = "RotateMember"
|
||||
// ActionTypeRotateStartMember causes a member to be shutdown and have it's pod removed. Do not wait to pod recover.
|
||||
|
|
155
pkg/deployment/reconcile/action_resign_leadership.go
Normal file
155
pkg/deployment/reconcile/action_resign_leadership.go
Normal file
|
@ -0,0 +1,155 @@
|
|||
//
|
||||
// DISCLAIMER
|
||||
//
|
||||
// Copyright 2021 ArangoDB GmbH, Cologne, Germany
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
//
|
||||
// Author Adam Janikowski
|
||||
//
|
||||
|
||||
package reconcile
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/arangodb/go-driver"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/arangod"
|
||||
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/errors"
|
||||
|
||||
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
|
||||
"github.com/rs/zerolog"
|
||||
)
|
||||
|
||||
func init() {
|
||||
registerAction(api.ActionTypeResignLeadership, newResignLeadershipAction)
|
||||
}
|
||||
|
||||
// newResignLeadershipAction creates a new Action that implements the given
|
||||
// planned ResignLeadership action.
|
||||
func newResignLeadershipAction(log zerolog.Logger, action api.Action, actionCtx ActionContext) Action {
|
||||
a := &actionResignLeadership{}
|
||||
|
||||
a.actionImpl = newActionImplDefRef(log, action, actionCtx, shutdownMemberTimeout)
|
||||
|
||||
return a
|
||||
}
|
||||
|
||||
// actionResignLeadership implements an ResignLeadershipAction.
|
||||
type actionResignLeadership struct {
|
||||
actionImpl
|
||||
}
|
||||
|
||||
// Start performs the start of the ReasignLeadership process on DBServer.
|
||||
func (a *actionResignLeadership) Start(ctx context.Context) (bool, error) {
|
||||
log := a.log
|
||||
group := a.action.Group
|
||||
m, ok := a.actionCtx.GetMemberStatusByID(a.action.MemberID)
|
||||
if !ok {
|
||||
log.Error().Msg("No such member")
|
||||
return true, nil
|
||||
}
|
||||
|
||||
if a.actionCtx.GetSpec().Mode.Get() != api.DeploymentModeCluster {
|
||||
log.Debug().Msg("Resign only allowed in cluster mode")
|
||||
return true, nil
|
||||
}
|
||||
|
||||
client, err := a.actionCtx.GetDatabaseClient(ctx)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msgf("Unable to get client")
|
||||
return true, errors.WithStack(err)
|
||||
}
|
||||
|
||||
switch group {
|
||||
case api.ServerGroupDBServers:
|
||||
cluster, err := client.Cluster(ctx)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msgf("Unable to get cluster client")
|
||||
return true, errors.WithStack(err)
|
||||
}
|
||||
|
||||
var jobID string
|
||||
jobCtx := driver.WithJobIDResponse(ctx, &jobID)
|
||||
log.Debug().Msg("Temporary shutdown, resign leadership")
|
||||
if err := cluster.ResignServer(jobCtx, m.ID); err != nil {
|
||||
log.Debug().Err(err).Msg("Failed to resign server")
|
||||
return true, errors.WithStack(err)
|
||||
}
|
||||
|
||||
m.CleanoutJobID = jobID
|
||||
|
||||
if err := a.actionCtx.UpdateMember(m); err != nil {
|
||||
return true, errors.WithStack(err)
|
||||
}
|
||||
|
||||
return false, nil
|
||||
default:
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
|
||||
// CheckProgress checks if Job is completed.
|
||||
func (a *actionResignLeadership) CheckProgress(ctx context.Context) (bool, bool, error) {
|
||||
log := a.log
|
||||
|
||||
m, ok := a.actionCtx.GetMemberStatusByID(a.action.MemberID)
|
||||
if !ok {
|
||||
log.Error().Msg("No such member")
|
||||
return true, false, nil
|
||||
}
|
||||
|
||||
agency, err := a.actionCtx.GetAgency(ctx)
|
||||
if err != nil {
|
||||
log.Debug().Err(err).Msg("Failed to create agency client")
|
||||
return false, false, errors.WithStack(err)
|
||||
}
|
||||
|
||||
c, err := a.actionCtx.GetDatabaseClient(ctx)
|
||||
if err != nil {
|
||||
log.Debug().Err(err).Msg("Failed to create member client")
|
||||
return false, false, errors.WithStack(err)
|
||||
}
|
||||
|
||||
jobStatus, err := arangod.CleanoutServerJobStatus(ctx, m.CleanoutJobID, c, agency)
|
||||
if err != nil {
|
||||
if driver.IsNotFound(err) {
|
||||
log.Debug().Err(err).Msg("Job not found, but proceeding")
|
||||
return true, false, nil
|
||||
}
|
||||
log.Debug().Err(err).Msg("Failed to fetch job status")
|
||||
return false, false, errors.WithStack(err)
|
||||
}
|
||||
|
||||
if jobStatus.IsFailed() {
|
||||
m.CleanoutJobID = ""
|
||||
if err := a.actionCtx.UpdateMember(m); err != nil {
|
||||
return false, false, errors.WithStack(err)
|
||||
}
|
||||
log.Error().Msg("Resign server job failed")
|
||||
return true, false, nil
|
||||
}
|
||||
|
||||
if jobStatus.IsFinished() {
|
||||
m.CleanoutJobID = ""
|
||||
if err := a.actionCtx.UpdateMember(m); err != nil {
|
||||
return false, false, errors.WithStack(err)
|
||||
}
|
||||
return true, false, nil
|
||||
}
|
||||
|
||||
return false, false, nil
|
||||
}
|
|
@ -300,6 +300,7 @@ func createRotateMemberPlan(log zerolog.Logger, member api.MemberStatus,
|
|||
Msg("Creating rotation plan")
|
||||
plan := api.Plan{
|
||||
api.NewAction(api.ActionTypeCleanTLSKeyfileCertificate, group, member.ID, "Remove server keyfile and enforce renewal/recreation"),
|
||||
api.NewAction(api.ActionTypeResignLeadership, group, member.ID, reason),
|
||||
api.NewAction(api.ActionTypeRotateMember, group, member.ID, reason),
|
||||
api.NewAction(api.ActionTypeWaitForMemberUp, group, member.ID),
|
||||
api.NewAction(api.ActionTypeWaitForMemberInSync, group, member.ID),
|
||||
|
|
|
@ -362,8 +362,9 @@ func createUpgradeMemberPlan(log zerolog.Logger, member api.MemberStatus,
|
|||
)
|
||||
}
|
||||
plan = append(plan,
|
||||
api.NewAction(api.ActionTypeResignLeadership, group, member.ID, reason),
|
||||
api.NewAction(upgradeAction, group, member.ID, reason),
|
||||
api.NewAction(api.ActionTypeWaitForMemberUp, group, member.ID),
|
||||
)
|
||||
return plan
|
||||
return withMaintenance(plan...)
|
||||
}
|
||||
|
|
|
@ -720,6 +720,7 @@ func TestCreatePlan(t *testing.T) {
|
|||
},
|
||||
ExpectedPlan: []api.Action{
|
||||
api.NewAction(api.ActionTypeCleanTLSKeyfileCertificate, api.ServerGroupAgents, "", "Remove server keyfile and enforce renewal/recreation"),
|
||||
api.NewAction(api.ActionTypeResignLeadership, api.ServerGroupAgents, ""),
|
||||
api.NewAction(api.ActionTypeRotateMember, api.ServerGroupAgents, ""),
|
||||
api.NewAction(api.ActionTypeWaitForMemberUp, api.ServerGroupAgents, ""),
|
||||
api.NewAction(api.ActionTypeWaitForMemberInSync, api.ServerGroupAgents, ""),
|
||||
|
|
Loading…
Reference in a new issue