2020-01-16 12:36:28 +00:00
//
// DISCLAIMER
//
2023-01-27 14:13:31 +00:00
// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany
2020-01-16 12:36:28 +00:00
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
package reconcile
import (
2020-06-08 11:30:32 +00:00
"context"
2022-03-18 23:49:20 +00:00
"fmt"
2022-02-16 00:36:45 +00:00
"github.com/arangodb/go-driver"
2020-01-16 12:36:28 +00:00
upgraderules "github.com/arangodb/go-upgrade-rules"
2022-07-11 11:49:47 +00:00
2020-04-08 10:32:24 +00:00
"github.com/arangodb/kube-arangodb/pkg/apis/deployment"
2020-01-16 12:36:28 +00:00
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
2022-02-16 13:29:24 +00:00
"github.com/arangodb/kube-arangodb/pkg/deployment/actions"
2023-06-08 16:26:37 +00:00
"github.com/arangodb/kube-arangodb/pkg/deployment/agency/state"
2022-07-11 11:49:47 +00:00
"github.com/arangodb/kube-arangodb/pkg/deployment/features"
2023-12-12 15:39:35 +00:00
sharedReconcile "github.com/arangodb/kube-arangodb/pkg/deployment/reconcile/shared"
2022-07-11 11:49:47 +00:00
"github.com/arangodb/kube-arangodb/pkg/deployment/resources"
2022-04-21 08:36:45 +00:00
"github.com/arangodb/kube-arangodb/pkg/deployment/rotation"
2023-07-20 19:15:51 +00:00
"github.com/arangodb/kube-arangodb/pkg/util"
2023-11-13 19:18:58 +00:00
"github.com/arangodb/kube-arangodb/pkg/util/compare"
2020-01-16 12:36:28 +00:00
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
)
2020-07-30 13:28:30 +00:00
var (
// rotationByAnnotationOrder - Change order of execution - Coordinators and Agents should be executed before DBServer to save time
rotationByAnnotationOrder = [ ] api . ServerGroup {
api . ServerGroupAgents ,
2020-08-19 15:12:12 +00:00
api . ServerGroupSingle ,
2020-07-30 13:28:30 +00:00
api . ServerGroupCoordinators ,
api . ServerGroupDBServers ,
api . ServerGroupSyncMasters ,
api . ServerGroupSyncWorkers ,
}
)
2020-06-08 11:30:32 +00:00
2021-08-20 13:02:36 +00:00
// upgradeDecision is the result of an upgrade check.
type upgradeDecision struct {
FromVersion driver . Version
FromLicense upgraderules . License
ToVersion driver . Version
ToLicense upgraderules . License
UpgradeNeeded bool // If set, the image version has changed
UpgradeAllowed bool // If set, it is an allowed version change
AutoUpgradeNeeded bool // If set, the database must be started with `--database.auto-upgrade` once
Hold bool
}
2020-07-30 13:28:30 +00:00
// createRotateOrUpgradePlan goes over all pods to check if an upgrade or rotate is needed.
2022-06-14 07:26:07 +00:00
func ( r * Reconciler ) createRotateOrUpgradePlan ( ctx context . Context , apiObject k8sutil . APIObject ,
2020-06-08 11:30:32 +00:00
spec api . DeploymentSpec , status api . DeploymentStatus ,
2022-05-15 16:11:41 +00:00
context PlanBuilderContext ) api . Plan {
2020-06-08 11:30:32 +00:00
var plan api . Plan
2022-06-14 07:26:07 +00:00
newPlan , idle := r . createRotateOrUpgradePlanInternal ( apiObject , spec , status , context )
2020-06-08 11:30:32 +00:00
if idle {
plan = append ( plan ,
2022-02-16 13:29:24 +00:00
actions . NewClusterAction ( api . ActionTypeIdle ) )
2020-06-08 11:30:32 +00:00
} else {
plan = append ( plan , newPlan ... )
}
return plan
}
2022-06-14 07:26:07 +00:00
func ( r * Reconciler ) createMarkToRemovePlan ( ctx context . Context , apiObject k8sutil . APIObject ,
2022-01-12 12:44:33 +00:00
spec api . DeploymentSpec , status api . DeploymentStatus ,
2022-05-15 16:11:41 +00:00
context PlanBuilderContext ) api . Plan {
2022-01-12 12:44:33 +00:00
var plan api . Plan
2020-07-30 13:28:30 +00:00
2022-07-24 18:26:26 +00:00
for _ , e := range status . Members . AsListInGroups ( rotationByAnnotationOrder ... ) {
m := e . Member
group := e . Group
2022-08-08 10:03:06 +00:00
if m . Phase != api . MemberPhaseCreated || m . Pod . GetName ( ) == "" {
2022-07-24 18:26:26 +00:00
// Only rotate when phase is created
continue
}
2020-07-30 13:28:30 +00:00
2022-07-24 18:26:26 +00:00
cache , ok := context . ACS ( ) . ClusterCache ( m . ClusterID )
if ! ok {
continue
}
2022-05-15 16:11:41 +00:00
2022-08-08 10:03:06 +00:00
pod , found := cache . Pod ( ) . V1 ( ) . GetSimple ( m . Pod . GetName ( ) )
2022-07-24 18:26:26 +00:00
if ! found {
continue
}
2020-03-11 07:57:03 +00:00
2022-07-24 18:26:26 +00:00
if pod . Annotations != nil {
if _ , ok := pod . Annotations [ deployment . ArangoDeploymentPodReplaceAnnotation ] ; ok && ( group == api . ServerGroupDBServers || group == api . ServerGroupAgents || group == api . ServerGroupCoordinators ) {
if ! m . Conditions . IsTrue ( api . ConditionTypeMarkedToRemove ) {
plan = append ( plan , actions . NewAction ( api . ActionTypeMarkToRemoveMember , group , m , "Replace flag present" ) )
continue
2020-10-29 12:52:13 +00:00
}
2020-03-11 07:57:03 +00:00
}
2020-01-16 12:36:28 +00:00
}
2022-07-24 18:26:26 +00:00
}
2022-01-12 12:44:33 +00:00
return plan
}
2022-06-14 07:26:07 +00:00
func ( r * Reconciler ) createRotateOrUpgradePlanInternal ( apiObject k8sutil . APIObject , spec api . DeploymentSpec , status api . DeploymentStatus , context PlanBuilderContext ) ( api . Plan , bool ) {
decision := r . createRotateOrUpgradeDecision ( spec , status , context )
2022-01-12 12:44:33 +00:00
2023-07-20 19:15:51 +00:00
agencyCache , ok := context . GetAgencyCache ( )
if ! ok {
// Unable to get agency state, do not restart
return nil , true
}
2022-01-14 09:58:49 +00:00
if decision . IsUpgrade ( ) {
2023-07-20 19:15:51 +00:00
return r . createUpgradePlanInternalCondition ( apiObject , spec , status , context , decision , agencyCache )
2022-04-21 08:36:45 +00:00
} else if decision . IsUpdate ( ) {
2023-07-20 19:15:51 +00:00
return r . createUpdatePlanInternalCondition ( apiObject , spec , status , decision , context , agencyCache )
2022-04-21 08:36:45 +00:00
} else {
upgradeCondition := status . Conditions . IsTrue ( api . ConditionTypeUpgradeInProgress )
updateCondition := status . Conditions . IsTrue ( api . ConditionTypeUpdateInProgress )
2022-02-16 00:36:45 +00:00
2022-04-21 08:36:45 +00:00
if upgradeCondition || updateCondition {
p := make ( api . Plan , 0 , 2 )
2022-02-16 00:36:45 +00:00
2022-04-21 08:36:45 +00:00
if upgradeCondition {
2023-12-12 15:39:35 +00:00
p = append ( p , sharedReconcile . RemoveConditionActionV2 ( "Upgrade done" , api . ConditionTypeUpgradeInProgress ) )
2022-02-16 00:36:45 +00:00
}
2022-04-21 08:36:45 +00:00
if updateCondition {
2023-12-12 15:39:35 +00:00
p = append ( p , sharedReconcile . RemoveConditionActionV2 ( "Update done" , api . ConditionTypeUpdateInProgress ) )
2022-02-16 00:36:45 +00:00
}
2022-04-21 08:36:45 +00:00
return p , false
2022-02-16 00:36:45 +00:00
}
2022-04-21 08:36:45 +00:00
}
2022-02-16 00:36:45 +00:00
2022-04-21 08:36:45 +00:00
return nil , false
}
2021-07-15 12:07:33 +00:00
2023-07-20 19:15:51 +00:00
func ( r * Reconciler ) createUpdatePlanInternalCondition ( apiObject k8sutil . APIObject , spec api . DeploymentSpec , status api . DeploymentStatus , decision updateUpgradeDecisionMap , context PlanBuilderContext , agencyCache state . State ) ( api . Plan , bool ) {
plan , idle := r . createUpdatePlanInternal ( apiObject , spec , status , decision , context , agencyCache )
2022-01-12 12:44:33 +00:00
2022-04-21 08:36:45 +00:00
if idle || len ( plan ) > 0 {
if ! status . Conditions . IsTrue ( api . ConditionTypeUpdateInProgress ) {
plan = append ( api . Plan {
2023-12-12 15:39:35 +00:00
sharedReconcile . UpdateConditionActionV2 ( "Update in progress" , api . ConditionTypeUpdateInProgress , true , "" , "" , "" ) ,
2022-04-21 08:36:45 +00:00
} , plan ... )
}
}
2022-01-12 12:44:33 +00:00
2022-04-21 08:36:45 +00:00
return plan , idle
}
2022-01-12 12:44:33 +00:00
2023-07-20 19:15:51 +00:00
func ( r * Reconciler ) createUpdatePlanInternal ( apiObject k8sutil . APIObject , spec api . DeploymentSpec , status api . DeploymentStatus , decision updateUpgradeDecisionMap , context PlanBuilderContext , agencyCache state . State ) ( api . Plan , bool ) {
2022-04-21 08:36:45 +00:00
// Update phase
2023-07-20 19:15:51 +00:00
2022-04-21 08:36:45 +00:00
for _ , m := range status . Members . AsList ( ) {
d := decision [ m . Member . ID ]
if ! d . update {
continue
2022-01-12 12:44:33 +00:00
}
2020-01-16 12:36:28 +00:00
2022-04-21 08:36:45 +00:00
if ! d . updateAllowed {
// Update is not allowed due to constraint
if ! d . unsafeUpdateAllowed {
2022-06-14 07:26:07 +00:00
r . planLogger . Str ( "member" , m . Member . ID ) . Str ( "Reason" , d . updateMessage ) . Info ( "Pod needs restart but cluster is not ready. Either some shards are not in sync or some member is not ready." )
2022-01-14 09:58:49 +00:00
continue
}
2022-06-14 07:26:07 +00:00
r . planLogger . Str ( "member" , m . Member . ID ) . Str ( "Reason" , d . updateMessage ) . Info ( "Pod needs restart but cluster is not ready. Either some shards are not in sync or some member is not ready, but unsafe upgrade is allowed" )
2022-04-21 08:36:45 +00:00
}
2022-01-12 12:44:33 +00:00
2022-04-21 08:36:45 +00:00
if m . Member . Conditions . IsTrue ( api . ConditionTypeRestart ) {
2023-07-20 19:15:51 +00:00
return r . createRotateMemberPlan ( m . Member , m . Group , spec , "Restart flag present" , util . CheckConditionalP1Nil ( agencyCache . GetRebootID , driver . ServerID ( m . Member . ID ) ) ) , false
2022-04-21 08:36:45 +00:00
}
2023-04-04 10:41:45 +00:00
2022-05-15 16:11:41 +00:00
arangoMember , ok := context . ACS ( ) . CurrentClusterCache ( ) . ArangoMember ( ) . V1 ( ) . GetSimple ( m . Member . ArangoMemberName ( apiObject . GetName ( ) , m . Group ) )
if ! ok {
continue
}
cache , ok := context . ACS ( ) . ClusterCache ( m . Member . ClusterID )
2022-04-21 08:36:45 +00:00
if ! ok {
continue
}
2022-01-12 12:44:33 +00:00
2022-08-08 10:03:06 +00:00
p , ok := cache . Pod ( ) . V1 ( ) . GetSimple ( m . Member . Pod . GetName ( ) )
2022-04-21 08:36:45 +00:00
if ! ok {
p = nil
}
2022-01-12 12:44:33 +00:00
2023-04-04 10:41:45 +00:00
if svc , ok := cache . Service ( ) . V1 ( ) . GetSimple ( arangoMember . GetName ( ) ) ; ok {
if k8sutil . IsServiceRotationRequired ( spec , svc ) {
2023-08-18 07:51:51 +00:00
return api . Plan {
2023-12-12 15:39:35 +00:00
sharedReconcile . RemoveMemberConditionActionV2 ( "Cleaning update" , api . ConditionTypePendingUpdate , m . Group , m . Member . ID ) ,
sharedReconcile . UpdateMemberConditionActionV2 ( "Cleaning update" , api . ConditionTypeUpdating , m . Group , m . Member . ID , true , "Cleaning update" , "" , "" ) ,
2023-08-18 07:51:51 +00:00
} , false
2023-04-04 10:41:45 +00:00
}
}
2022-12-08 19:26:58 +00:00
if mode , p , checksum , reason , err := rotation . IsRotationRequired ( context . ACS ( ) , spec , m . Member , m . Group , p , arangoMember . Spec . Template , arangoMember . Status . Template ) ; err != nil {
2022-06-14 07:26:07 +00:00
r . planLogger . Err ( err ) . Str ( "member" , m . Member . ID ) . Error ( "Error while generating update plan" )
2022-04-21 08:36:45 +00:00
continue
2023-11-13 19:18:58 +00:00
} else if mode != compare . InPlaceRotation {
2023-08-18 07:51:51 +00:00
return api . Plan {
2023-12-12 15:39:35 +00:00
sharedReconcile . RemoveMemberConditionActionV2 ( reason , api . ConditionTypePendingUpdate , m . Group , m . Member . ID ) ,
sharedReconcile . UpdateMemberConditionActionV2 ( reason , api . ConditionTypeUpdating , m . Group , m . Member . ID , true , reason , "" , "" ) ,
2023-08-18 07:51:51 +00:00
} , false
2022-04-21 08:36:45 +00:00
} else {
2022-12-08 17:53:40 +00:00
p = withWaitForMember ( p , m . Group , m . Member )
2022-04-21 08:36:45 +00:00
2022-12-08 19:26:58 +00:00
p = append ( p , actions . NewAction ( api . ActionTypeArangoMemberUpdatePodStatus , m . Group , m . Member , "Propagating status of pod" ) . AddParam ( ActionTypeArangoMemberUpdatePodStatusChecksum , checksum ) )
2023-09-21 06:19:19 +00:00
p = p . WrapWithPlan ( api . Plan {
2023-12-12 15:39:35 +00:00
sharedReconcile . RemoveMemberConditionActionV2 ( reason , api . ConditionTypePendingUpdate , m . Group , m . Member . ID ) ,
sharedReconcile . UpdateMemberConditionActionV2 ( reason , api . ConditionTypeUpdating , m . Group , m . Member . ID , true , reason , "" , "" ) ,
2023-08-18 07:51:51 +00:00
} , api . Plan {
2023-12-12 15:39:35 +00:00
sharedReconcile . RemoveMemberConditionActionV2 ( reason , api . ConditionTypeUpdating , m . Group , m . Member . ID ) ,
2023-08-18 07:51:51 +00:00
} )
2022-04-21 08:36:45 +00:00
return p , false
}
}
return nil , true
}
2022-01-12 12:44:33 +00:00
2023-07-20 19:15:51 +00:00
func ( r * Reconciler ) createUpgradePlanInternalCondition ( apiObject k8sutil . APIObject , spec api . DeploymentSpec , status api . DeploymentStatus , context PlanBuilderContext , decision updateUpgradeDecisionMap , agencyCache state . State ) ( api . Plan , bool ) {
plan , idle := r . createUpgradePlanInternal ( apiObject , spec , status , context , decision , agencyCache )
2022-04-21 08:36:45 +00:00
if idle || len ( plan ) > 0 {
if ! status . Conditions . IsTrue ( api . ConditionTypeUpgradeInProgress ) {
plan = append ( api . Plan {
2023-12-12 15:39:35 +00:00
sharedReconcile . UpdateConditionActionV2 ( "Upgrade in progress" , api . ConditionTypeUpgradeInProgress , true , "" , "" , "" ) ,
2022-04-21 08:36:45 +00:00
} , plan ... )
}
}
return plan , idle
}
2023-07-20 19:15:51 +00:00
func ( r * Reconciler ) createUpgradePlanInternal ( apiObject k8sutil . APIObject , spec api . DeploymentSpec , status api . DeploymentStatus , context PlanBuilderContext , decision updateUpgradeDecisionMap , agencyCache state . State ) ( api . Plan , bool ) {
2022-04-21 08:36:45 +00:00
for _ , m := range status . Members . AsList ( ) {
// Pre-check
d := decision [ m . Member . ID ]
if ! d . upgrade {
continue
}
// We have member to upgrade
if d . upgradeDecision . Hold {
// Holding upgrade
continue
}
if ! d . upgradeDecision . UpgradeAllowed {
context . CreateEvent ( k8sutil . NewUpgradeNotAllowedEvent ( apiObject , d . upgradeDecision . FromVersion , d . upgradeDecision . ToVersion , d . upgradeDecision . FromLicense , d . upgradeDecision . ToLicense ) )
return nil , false
}
}
// Upgrade phase
// During upgrade always get first member which needs to be upgraded
for _ , m := range status . Members . AsList ( ) {
d := decision [ m . Member . ID ]
if ! d . upgrade {
continue
}
// We have member to upgrade
if d . upgradeDecision . Hold {
// Holding upgrade
return nil , false
}
if ! d . upgradeDecision . UpgradeNeeded {
// In upgrade scenario but upgrade is not needed
return nil , false
}
if ! d . upgradeDecision . UpgradeAllowed {
context . CreateEvent ( k8sutil . NewUpgradeNotAllowedEvent ( apiObject , d . upgradeDecision . FromVersion , d . upgradeDecision . ToVersion , d . upgradeDecision . FromLicense , d . upgradeDecision . ToLicense ) )
return nil , false
}
if d . updateAllowed {
// We are fine, group is alive so we can proceed
2022-06-14 07:26:07 +00:00
r . planLogger . Str ( "member" , m . Member . ID ) . Str ( "Reason" , d . updateMessage ) . Info ( "Upgrade allowed" )
2023-07-20 19:15:51 +00:00
return r . createUpgradeMemberPlan ( m . Member , m . Group , "Version upgrade" , spec , status , ! d . upgradeDecision . AutoUpgradeNeeded , agencyCache ) , false
2022-04-21 08:36:45 +00:00
} else if d . unsafeUpdateAllowed {
2022-06-14 07:26:07 +00:00
r . planLogger . Str ( "member" , m . Member . ID ) . Str ( "Reason" , d . updateMessage ) . Info ( "Pod needs upgrade but cluster is not ready. Either some shards are not in sync or some member is not ready, but unsafe upgrade is allowed" )
2023-07-20 19:15:51 +00:00
return r . createUpgradeMemberPlan ( m . Member , m . Group , "Version upgrade" , spec , status , ! d . upgradeDecision . AutoUpgradeNeeded , agencyCache ) , false
2022-04-21 08:36:45 +00:00
} else {
2022-06-14 07:26:07 +00:00
r . planLogger . Str ( "member" , m . Member . ID ) . Str ( "Reason" , d . updateMessage ) . Info ( "Pod needs upgrade but cluster is not ready. Either some shards are not in sync or some member is not ready." )
2022-04-21 08:36:45 +00:00
return nil , true
2022-01-12 12:44:33 +00:00
}
}
2022-06-14 07:26:07 +00:00
r . planLogger . Warn ( "Pod upgrade plan has been made, but it has been dropped due to missing flag" )
2022-01-14 09:58:49 +00:00
return nil , false
2022-01-12 12:44:33 +00:00
}
2020-01-16 12:36:28 +00:00
// podNeedsUpgrading decides if an upgrade of the pod is needed (to comply with
// the given spec) and if that is allowed.
2022-06-14 07:26:07 +00:00
func ( r * Reconciler ) podNeedsUpgrading ( status api . MemberStatus , spec api . DeploymentSpec , images api . ImageInfoList ) upgradeDecision {
2020-11-23 13:19:50 +00:00
currentImage , found := currentImageInfo ( spec , images )
if ! found {
// Hold rotation tasks - we do not know image
return upgradeDecision { Hold : true }
}
memberImage , found := memberImageInfo ( spec , status , images )
if ! found {
// Member info not found
return upgradeDecision { UpgradeNeeded : false }
}
if currentImage . Image == memberImage . Image {
// No change
return upgradeDecision { UpgradeNeeded : false }
}
// Image changed, check if change is allowed
specVersion := currentImage . ArangoDBVersion
memberVersion := memberImage . ArangoDBVersion
asLicense := func ( info api . ImageInfo ) upgraderules . License {
if info . Enterprise {
return upgraderules . LicenseEnterprise
2020-01-16 12:36:28 +00:00
}
2020-11-23 13:19:50 +00:00
return upgraderules . LicenseCommunity
}
specLicense := asLicense ( currentImage )
memberLicense := asLicense ( memberImage )
if err := upgraderules . CheckUpgradeRulesWithLicense ( memberVersion , specVersion , memberLicense , specLicense ) ; err != nil {
// E.g. 3.x -> 4.x, we cannot allow automatically
return upgradeDecision {
FromVersion : memberVersion ,
FromLicense : memberLicense ,
ToVersion : specVersion ,
ToLicense : specLicense ,
UpgradeNeeded : true ,
UpgradeAllowed : false ,
2020-01-16 12:36:28 +00:00
}
2020-11-23 13:19:50 +00:00
}
if specVersion . Major ( ) != memberVersion . Major ( ) || specVersion . Minor ( ) != memberVersion . Minor ( ) {
// Is allowed, with `--database.auto-upgrade`
2022-06-14 07:26:07 +00:00
r . planLogger . Str ( "spec-version" , string ( specVersion ) ) . Str ( "pod-version" , string ( memberVersion ) ) .
2020-11-23 13:19:50 +00:00
Int ( "spec-version.major" , specVersion . Major ( ) ) . Int ( "spec-version.minor" , specVersion . Minor ( ) ) .
Int ( "pod-version.major" , memberVersion . Major ( ) ) . Int ( "pod-version.minor" , memberVersion . Minor ( ) ) .
2022-06-14 07:26:07 +00:00
Info ( "Deciding to do a upgrade with --auto-upgrade" )
2020-01-16 12:36:28 +00:00
return upgradeDecision {
2020-11-23 13:19:50 +00:00
FromVersion : memberVersion ,
FromLicense : memberLicense ,
2020-01-16 12:36:28 +00:00
ToVersion : specVersion ,
ToLicense : specLicense ,
UpgradeNeeded : true ,
UpgradeAllowed : true ,
2020-11-23 13:19:50 +00:00
AutoUpgradeNeeded : true ,
2020-01-16 12:36:28 +00:00
}
}
2020-11-23 13:19:50 +00:00
// Patch version change, rotate only
return upgradeDecision {
FromVersion : memberVersion ,
FromLicense : memberLicense ,
ToVersion : specVersion ,
ToLicense : specLicense ,
UpgradeNeeded : true ,
UpgradeAllowed : true ,
2020-12-15 11:41:14 +00:00
AutoUpgradeNeeded : true ,
2020-11-23 13:19:50 +00:00
}
}
func currentImageInfo ( spec api . DeploymentSpec , images api . ImageInfoList ) ( api . ImageInfo , bool ) {
if i , ok := images . GetByImage ( spec . GetImage ( ) ) ; ok {
return i , true
}
if i , ok := images . GetByImageID ( spec . GetImage ( ) ) ; ok {
return i , true
}
return api . ImageInfo { } , false
}
func memberImageInfo ( spec api . DeploymentSpec , status api . MemberStatus , images api . ImageInfoList ) ( api . ImageInfo , bool ) {
if status . Image != nil {
return * status . Image , true
}
if i , ok := images . GetByImage ( spec . GetImage ( ) ) ; ok {
return i , true
}
if i , ok := images . GetByImageID ( spec . GetImage ( ) ) ; ok {
return i , true
}
return api . ImageInfo { } , false
2020-01-16 12:36:28 +00:00
}
2022-06-14 07:26:07 +00:00
func ( r * Reconciler ) getPodDetails ( ctx context . Context , apiObject k8sutil . APIObject , spec api . DeploymentSpec ,
2020-03-17 08:31:52 +00:00
group api . ServerGroup , status api . DeploymentStatus , m api . MemberStatus ,
2022-07-24 18:40:09 +00:00
planCtx PlanBuilderContext ) ( string , * api . ArangoMember , bool ) {
2021-08-26 07:59:16 +00:00
imageInfo , imageFound := planCtx . SelectImageForMember ( spec , status , m )
2020-03-17 08:31:52 +00:00
if ! imageFound {
// Image is not found, so rotation is not needed
2022-07-24 18:40:09 +00:00
return "" , nil , false
2020-03-17 08:31:52 +00:00
}
2022-05-15 16:11:41 +00:00
member , ok := planCtx . ACS ( ) . CurrentClusterCache ( ) . ArangoMember ( ) . V1 ( ) . GetSimple ( m . ArangoMemberName ( apiObject . GetName ( ) , group ) )
2021-08-26 07:59:16 +00:00
if ! ok {
2022-07-24 18:40:09 +00:00
return "" , nil , false
2020-09-17 13:05:28 +00:00
}
2020-10-28 22:46:01 +00:00
groupSpec := spec . GetServerGroupSpec ( group )
2022-05-15 16:11:41 +00:00
renderedPod , err := planCtx . RenderPodForMember ( ctx , planCtx . ACS ( ) , spec , status , m . ID , imageInfo )
2020-03-17 08:31:52 +00:00
if err != nil {
2022-06-14 07:26:07 +00:00
r . planLogger . Err ( err ) . Error ( "Error while rendering pod" )
2022-07-24 18:40:09 +00:00
return "" , nil , false
2020-03-17 08:31:52 +00:00
}
2020-10-28 22:46:01 +00:00
checksum , err := resources . ChecksumArangoPod ( groupSpec , renderedPod )
2020-03-17 08:31:52 +00:00
if err != nil {
2022-06-14 07:26:07 +00:00
r . planLogger . Err ( err ) . Error ( "Error while getting pod checksum" )
2022-07-24 18:40:09 +00:00
return "" , nil , false
2021-08-26 07:59:16 +00:00
}
2022-07-24 18:40:09 +00:00
return checksum , member , true
2021-08-26 07:59:16 +00:00
}
// arangoMemberPodTemplateNeedsUpdate returns true when the specification of the
// given pod differs from what it should be according to the
// given deployment spec.
// When true is returned, a reason for the rotation is already returned.
2022-06-14 07:26:07 +00:00
func ( r * Reconciler ) arangoMemberPodTemplateNeedsUpdate ( ctx context . Context , apiObject k8sutil . APIObject , spec api . DeploymentSpec ,
2021-08-26 07:59:16 +00:00
group api . ServerGroup , status api . DeploymentStatus , m api . MemberStatus ,
2022-05-15 16:11:41 +00:00
planCtx PlanBuilderContext ) ( string , bool ) {
2022-07-24 18:40:09 +00:00
checksum , member , valid := r . getPodDetails ( ctx , apiObject , spec , group , status , m , planCtx )
2021-08-26 07:59:16 +00:00
if valid && ! member . Spec . Template . EqualPodSpecChecksum ( checksum ) {
return "Pod Spec changed" , true
}
return "" , false
}
2022-03-18 23:49:20 +00:00
// groupReadyForRestart returns true if the cluster is ready for the next update, that is:
2022-12-22 09:49:51 +00:00
// - all shards are in sync
// - all members are ready and fine
2022-03-18 23:49:20 +00:00
func groupReadyForRestart ( context PlanBuilderContext , status api . DeploymentStatus , member api . MemberStatus , group api . ServerGroup ) ( bool , string ) {
2022-01-14 09:58:49 +00:00
if group == api . ServerGroupSingle {
2022-03-18 23:49:20 +00:00
return true , "Restart always in single mode"
2022-01-12 12:44:33 +00:00
}
if ! status . Conditions . IsTrue ( api . ConditionTypeBootstrapCompleted ) {
// Restart is allowed always when bootstrap is not yet completed
2022-03-18 23:49:20 +00:00
return true , "Bootstrap not completed, restart is allowed"
2022-01-12 12:44:33 +00:00
}
2022-01-13 12:37:36 +00:00
// If current member did not become ready even once. Kill it
if ! member . Conditions . IsTrue ( api . ConditionTypeStarted ) {
2022-03-18 23:49:20 +00:00
return true , "Member is not started"
2022-01-13 12:37:36 +00:00
}
// If current core containers are dead kill it.
if ! member . Conditions . IsTrue ( api . ConditionTypeServing ) {
2022-03-18 23:49:20 +00:00
return true , "Member is not serving"
}
if ! status . Members . MembersOfGroup ( group ) . AllMembersServing ( ) {
return false , "Not all members are serving"
2022-01-12 12:44:33 +00:00
}
switch group {
case api . ServerGroupDBServers :
2022-03-18 23:49:20 +00:00
agencyState , ok := context . GetAgencyCache ( )
if ! ok {
// Unable to get agency state, do not restart
return false , "Unable to get agency cache"
}
2023-06-08 16:26:37 +00:00
blockingRestartShards := state . GetDBServerBlockingRestartShards ( agencyState , state . Server ( member . ID ) )
2022-03-18 23:49:20 +00:00
if s := len ( blockingRestartShards ) ; s > 0 {
return false , fmt . Sprintf ( "There are %d shards which are blocking restart" , s )
}
2022-06-27 08:00:01 +00:00
case api . ServerGroupAgents :
agencyHealth , ok := context . GetAgencyHealth ( )
if ! ok {
// Unable to get agency state, do not restart
return false , "Unable to get agency cache"
}
if err := agencyHealth . Healthy ( ) ; err != nil {
return false , fmt . Sprintf ( "Restart of agent is not allowed due to: %s" , err . Error ( ) )
}
2022-01-12 12:44:33 +00:00
}
2022-03-18 23:49:20 +00:00
return true , "Restart allowed"
2020-01-16 12:36:28 +00:00
}
// createUpgradeMemberPlan creates a plan to upgrade (stop-recreateWithAutoUpgrade-stop-start) an existing
// member.
2022-06-14 07:26:07 +00:00
func ( r * Reconciler ) createUpgradeMemberPlan ( member api . MemberStatus ,
2023-07-20 19:15:51 +00:00
group api . ServerGroup , reason string , spec api . DeploymentSpec , status api . DeploymentStatus , rotateStatefull bool , agencyCache state . State ) api . Plan {
2020-01-16 12:36:28 +00:00
upgradeAction := api . ActionTypeUpgradeMember
if rotateStatefull || group . IsStateless ( ) {
upgradeAction = api . ActionTypeRotateMember
}
2022-06-14 07:26:07 +00:00
r . planLogger .
2020-01-16 12:36:28 +00:00
Str ( "id" , member . ID ) .
Str ( "role" , group . AsRole ( ) ) .
Str ( "reason" , reason ) .
Str ( "action" , string ( upgradeAction ) ) .
2022-06-14 07:26:07 +00:00
Info ( "Creating upgrade plan" )
2022-02-16 00:36:45 +00:00
2023-07-20 19:15:51 +00:00
plan := createRotateMemberPlanWithAction ( member , group , upgradeAction , spec , reason , util . CheckConditionalP1Nil ( agencyCache . GetRebootID , driver . ServerID ( member . ID ) ) )
2022-02-16 00:36:45 +00:00
if member . Image == nil || member . Image . Image != spec . GetImage ( ) {
2022-02-16 13:29:24 +00:00
plan = plan . Before ( actions . NewAction ( api . ActionTypeSetMemberCurrentImage , group , member , reason ) . SetImage ( spec . GetImage ( ) ) )
2021-01-19 14:39:23 +00:00
}
2020-11-23 13:19:50 +00:00
if status . CurrentImage == nil || status . CurrentImage . Image != spec . GetImage ( ) {
2022-02-16 13:29:24 +00:00
plan = plan . Before ( actions . NewClusterAction ( api . ActionTypeSetCurrentImage , reason ) . SetImage ( spec . GetImage ( ) ) )
2020-09-17 13:05:28 +00:00
}
2021-07-15 12:07:33 +00:00
2022-08-04 11:06:23 +00:00
return plan
2021-07-15 12:07:33 +00:00
}
func withSecureWrap ( member api . MemberStatus ,
2023-07-20 19:15:51 +00:00
group api . ServerGroup , spec api . DeploymentSpec , rebootID * int , plan ... api . Action ) api . Plan {
2021-07-15 12:07:33 +00:00
image := member . Image
if image == nil {
return plan
2020-01-16 12:36:28 +00:00
}
2021-07-15 12:07:33 +00:00
if skipResignLeadership ( spec . GetMode ( ) , image . ArangoDBVersion ) {
// In this case we skip resign leadership but we enable maintenance
return withMaintenanceStart ( plan ... )
} else {
2023-07-20 19:15:51 +00:00
return withResignLeadership ( group , member , "ResignLeadership" , plan , rebootID )
2021-07-15 12:07:33 +00:00
}
}
func skipResignLeadership ( mode api . DeploymentMode , v driver . Version ) bool {
return mode == api . DeploymentModeCluster && features . Maintenance ( ) . Enabled ( ) && ( ( v . CompareTo ( "3.6.0" ) >= 0 && v . CompareTo ( "3.6.14" ) <= 0 ) ||
( v . CompareTo ( "3.7.0" ) >= 0 && v . CompareTo ( "3.7.12" ) <= 0 ) )
2020-01-16 12:36:28 +00:00
}
2022-12-08 17:53:40 +00:00
func withWaitForMember ( plan api . Plan , group api . ServerGroup , member api . MemberStatus ) api . Plan {
return append ( plan , waitForMemberActions ( group , member ) ... )
}
func waitForMemberActions ( group api . ServerGroup , member api . MemberStatus ) api . Plan {
return api . Plan {
actions . NewAction ( api . ActionTypeWaitForMemberUp , group , member , "Wait for member to be up after creation" ) ,
actions . NewAction ( api . ActionTypeWaitForMemberReady , group , member , "Wait for member pod to be ready after creation" ) ,
actions . NewAction ( api . ActionTypeWaitForMemberInSync , group , member , "Wait for member to be in sync after creation" ) ,
}
}