From f86b88a8c34b63406adc2c82127078364576d62a Mon Sep 17 00:00:00 2001 From: jwierzbo Date: Thu, 18 Aug 2022 10:53:39 +0200 Subject: [PATCH] [Bugfix] Always recreate dbservers if they have a leader on it (#1083) --- CHANGELOG.md | 1 + pkg/deployment/agency/leader_test.go | 91 +++++++++++++++++++ pkg/deployment/agency/plan_collections.go | 34 +++++++ .../reconcile/plan_builder_member_recovery.go | 7 ++ 4 files changed, 133 insertions(+) create mode 100644 pkg/deployment/agency/leader_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 32df5ad9b..68f4a588a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ - (QA) Member maintenance feature - (Feature) Extract Pod Details - (Feature) Add Timezone management +- (Bugfix) Always recreate DBServers if they have a leader on it. ## [1.2.15](https://github.com/arangodb/kube-arangodb/tree/1.2.15) (2022-07-20) - (Bugfix) Ensure pod names not too long diff --git a/pkg/deployment/agency/leader_test.go b/pkg/deployment/agency/leader_test.go new file mode 100644 index 000000000..12ddedcaf --- /dev/null +++ b/pkg/deployment/agency/leader_test.go @@ -0,0 +1,91 @@ +// +// DISCLAIMER +// +// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// + +package agency + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func Test_LeaderCheck_DBServerIsLeader(t *testing.T) { + t.Run("Empty collection", func(t *testing.T) { + p := StatePlanCollections{} + + require.False(t, p.IsDBServerLeader("A")) + }) + t.Run("Empty shards", func(t *testing.T) { + p := StatePlanCollections{ + "a": { + "a": { + Shards: nil, + }, + }, + } + + require.False(t, p.IsDBServerLeader("A")) + }) + t.Run("Empty shard list", func(t *testing.T) { + p := StatePlanCollections{ + "a": { + "a": { + Shards: Shards{ + "a": {}, + }, + }, + }, + } + + require.False(t, p.IsDBServerLeader("A")) + }) + t.Run("Follower", func(t *testing.T) { + p := StatePlanCollections{ + "a": { + "a": { + Shards: Shards{ + "a": { + "B", + "A", + }, + }, + }, + }, + } + + require.False(t, p.IsDBServerLeader("A")) + }) + t.Run("Leader", func(t *testing.T) { + p := StatePlanCollections{ + "a": { + "a": { + Shards: Shards{ + "a": { + "A", + "B", + }, + }, + }, + }, + } + + require.True(t, p.IsDBServerLeader("A")) + }) +} diff --git a/pkg/deployment/agency/plan_collections.go b/pkg/deployment/agency/plan_collections.go index c7e63e0ac..c5b84aa00 100644 --- a/pkg/deployment/agency/plan_collections.go +++ b/pkg/deployment/agency/plan_collections.go @@ -31,6 +31,15 @@ func (a StatePlanCollections) IsDBServerPresent(name Server) bool { return false } +func (a StatePlanCollections) IsDBServerLeader(name Server) bool { + for _, collections := range a { + if collections.IsDBServerLeaderInCollections(name) { + return true + } + } + return false +} + type StatePlanDBCollections map[string]StatePlanCollection func (a StatePlanDBCollections) IsDBServerInCollections(name Server) bool { @@ -42,6 +51,15 @@ func (a StatePlanDBCollections) IsDBServerInCollections(name Server) bool { return false } +func (a StatePlanDBCollections) IsDBServerLeaderInCollections(name Server) bool { + for _, collection := range a { + if collection.IsDBServerLeader(name) { + return true + } + } + return false +} + func (a StatePlanDBCollections) CountShards() int { count := 0 @@ -121,3 +139,19 @@ func (a *StatePlanCollection) IsDBServerInShards(name Server) bool { } return false } + +func (a *StatePlanCollection) IsDBServerLeader(name Server) bool { + if a == nil { + return false + } + + for _, servers := range a.Shards { + if len(servers) == 0 { + continue + } + if servers[0] == name { + return true + } + } + return false +} diff --git a/pkg/deployment/reconcile/plan_builder_member_recovery.go b/pkg/deployment/reconcile/plan_builder_member_recovery.go index a77670a46..16e27b931 100644 --- a/pkg/deployment/reconcile/plan_builder_member_recovery.go +++ b/pkg/deployment/reconcile/plan_builder_member_recovery.go @@ -80,6 +80,13 @@ func (r *Reconciler) createMemberFailedRestoreInternal(_ context.Context, _ k8su continue } + if agencyState.Plan.Collections.IsDBServerLeader(agency.Server(m.ID)) { + memberLog.Info("Recreating leader DBServer - it cannot be removed gracefully") + plan = append(plan, actions.NewAction(api.ActionTypeRecreateMember, group, m)) + + continue + } + if c := spec.DBServers.GetCount(); c <= len(members)-failed { // There are more or equal alive members than current count. A member should not be recreated. continue