1
0
Fork 0
mirror of https://github.com/arangodb/kube-arangodb.git synced 2024-12-14 11:57:37 +00:00

[Bugfix] Allow shards with RF1 in EnforcedResignLeadership action (#1441)

This commit is contained in:
Adam Janikowski 2023-10-15 17:29:40 +02:00 committed by GitHub
parent 83c5c83589
commit ebd0dfdd5d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 37 additions and 5 deletions

View file

@ -13,6 +13,7 @@
- (Feature) EnforcedResignLeadership action
- (Maintenance) Make scale_down_candidate annotation obsolete
- (Bugfix) Fix ResignJob ID propagation
- (Bugfix) Allow shards with RF1 in EnforcedResignLeadership action
## [1.2.33](https://github.com/arangodb/kube-arangodb/tree/1.2.33) (2023-09-27)
- (Maintenance) Bump golang.org/x/net to v0.13.0

View file

@ -238,6 +238,30 @@ func (s State) PlanLeaderServers() Servers {
return r
}
// PlanLeaderServersWithFailOver returns all servers which are part of the plan as a leader and can fail over
func (s State) PlanLeaderServersWithFailOver() Servers {
q := map[Server]bool{}
for _, db := range s.Plan.Collections {
for _, col := range db {
for _, shards := range col.Shards {
if len(shards) <= 1 {
continue
}
q[shards[0]] = true
}
}
}
r := make([]Server, 0, len(q))
for k := range q {
r = append(r, k)
}
return r
}
type CollectionShardDetails []CollectionShardDetail
type CollectionShardDetail struct {

View file

@ -103,18 +103,18 @@ func (a *actionEnforceResignLeadership) CheckProgress(ctx context.Context) (bool
}
// Lets start resign job if required
if j, ok := a.actionCtx.Get(a.action, resignLeadershipJobID); ok && j != "" {
if j, ok := a.actionCtx.Get(a.action, resignLeadershipJobID); ok && j != "" && j != "N/A" {
_, jobStatus := agencyState.Target.GetJob(state.JobID(j))
switch jobStatus {
case state.JobPhaseFailed:
a.log.Error("Resign server job failed")
// Remove key
a.actionCtx.Add(resignLeadershipJobID, "", true)
a.actionCtx.Add(resignLeadershipJobID, "N/A", true)
return false, false, nil
case state.JobPhaseFinished:
a.log.Info("Job finished")
// Remove key
a.actionCtx.Add(resignLeadershipJobID, "", true)
a.actionCtx.Add(resignLeadershipJobID, "N/A", true)
case state.JobPhaseUnknown:
a.log.Str("status", string(jobStatus)).Error("Resign server job unknown status")
return false, false, nil
@ -122,11 +122,18 @@ func (a *actionEnforceResignLeadership) CheckProgress(ctx context.Context) (bool
return false, false, nil
}
a.actionCtx.Add(resignLeadershipJobID, "N/A", true)
// Job is Finished, check if we are not a leader anymore
if agencyState.PlanLeaderServers().Contains(state.Server(m.ID)) {
// We are still a leader!
a.log.Warn("DBServers is still a leader for shards")
return false, false, nil
if agencyState.PlanLeaderServersWithFailOver().Contains(state.Server(m.ID)) {
// We need to retry
a.log.Warn("DBServer is still a leader for shards")
return false, false, nil
}
// Nothing to do as RF is set to 1
a.log.Warn("DBServer is still a leader for shards, but ReplicationFactor is set to 1")
}
return true, false, nil
}