2020-01-16 12:36:28 +00:00
//
// DISCLAIMER
//
2022-01-10 11:35:49 +00:00
// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany
2020-01-16 12:36:28 +00:00
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
package reconcile
import (
2020-06-08 11:30:32 +00:00
"context"
2022-01-12 12:44:33 +00:00
"github.com/arangodb/go-driver"
2021-09-06 21:49:24 +00:00
"github.com/arangodb/kube-arangodb/pkg/deployment/rotation"
2021-07-15 12:07:33 +00:00
"github.com/arangodb/kube-arangodb/pkg/deployment/features"
2020-10-28 22:46:01 +00:00
"github.com/arangodb/kube-arangodb/pkg/deployment/resources"
2020-01-16 12:36:28 +00:00
upgraderules "github.com/arangodb/go-upgrade-rules"
2020-04-08 10:32:24 +00:00
"github.com/arangodb/kube-arangodb/pkg/apis/deployment"
2020-01-16 12:36:28 +00:00
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
2020-05-18 13:27:53 +00:00
"github.com/arangodb/kube-arangodb/pkg/util"
2020-01-16 12:36:28 +00:00
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
2021-03-10 13:30:47 +00:00
inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector"
2020-01-16 12:36:28 +00:00
"github.com/rs/zerolog"
2020-02-21 11:59:19 +00:00
core "k8s.io/api/core/v1"
2020-01-16 12:36:28 +00:00
)
2020-07-30 13:28:30 +00:00
var (
// rotationByAnnotationOrder - Change order of execution - Coordinators and Agents should be executed before DBServer to save time
rotationByAnnotationOrder = [ ] api . ServerGroup {
api . ServerGroupAgents ,
2020-08-19 15:12:12 +00:00
api . ServerGroupSingle ,
2020-07-30 13:28:30 +00:00
api . ServerGroupCoordinators ,
api . ServerGroupDBServers ,
api . ServerGroupSyncMasters ,
api . ServerGroupSyncWorkers ,
}
)
2020-06-08 11:30:32 +00:00
2021-08-20 13:02:36 +00:00
// upgradeDecision is the result of an upgrade check.
type upgradeDecision struct {
FromVersion driver . Version
FromLicense upgraderules . License
ToVersion driver . Version
ToLicense upgraderules . License
UpgradeNeeded bool // If set, the image version has changed
UpgradeAllowed bool // If set, it is an allowed version change
AutoUpgradeNeeded bool // If set, the database must be started with `--database.auto-upgrade` once
Hold bool
}
2020-07-30 13:28:30 +00:00
// createRotateOrUpgradePlan goes over all pods to check if an upgrade or rotate is needed.
2020-06-08 11:30:32 +00:00
func createRotateOrUpgradePlan ( ctx context . Context ,
log zerolog . Logger , apiObject k8sutil . APIObject ,
spec api . DeploymentSpec , status api . DeploymentStatus ,
2021-03-10 13:30:47 +00:00
cachedStatus inspectorInterface . Inspector , context PlanBuilderContext ) api . Plan {
2020-06-08 11:30:32 +00:00
var plan api . Plan
2021-08-26 07:59:16 +00:00
newPlan , idle := createRotateOrUpgradePlanInternal ( log , apiObject , spec , status , cachedStatus , context )
2020-06-08 11:30:32 +00:00
if idle {
plan = append ( plan ,
api . NewAction ( api . ActionTypeIdle , api . ServerGroupUnknown , "" ) )
} else {
plan = append ( plan , newPlan ... )
}
return plan
}
2022-01-12 12:44:33 +00:00
func createMarkToRemovePlan ( ctx context . Context ,
log zerolog . Logger , apiObject k8sutil . APIObject ,
spec api . DeploymentSpec , status api . DeploymentStatus ,
cachedStatus inspectorInterface . Inspector , context PlanBuilderContext ) api . Plan {
var plan api . Plan
2020-07-30 13:28:30 +00:00
status . Members . ForeachServerInGroups ( func ( group api . ServerGroup , members api . MemberStatusList ) error {
for _ , m := range members {
if m . Phase != api . MemberPhaseCreated || m . PodName == "" {
// Only rotate when phase is created
continue
}
pod , found := cachedStatus . Pod ( m . PodName )
if ! found {
continue
}
2020-03-11 07:57:03 +00:00
if pod . Annotations != nil {
2021-10-04 13:43:47 +00:00
if _ , ok := pod . Annotations [ deployment . ArangoDeploymentPodReplaceAnnotation ] ; ok && ( group == api . ServerGroupDBServers || group == api . ServerGroupAgents || group == api . ServerGroupCoordinators ) {
2021-09-20 15:49:29 +00:00
if ! m . Conditions . IsTrue ( api . ConditionTypeMarkedToRemove ) {
2022-01-12 12:44:33 +00:00
plan = append ( plan , api . NewAction ( api . ActionTypeMarkToRemoveMember , group , m . ID , "Replace flag present" ) )
2021-09-20 15:49:29 +00:00
continue
}
2020-10-29 12:52:13 +00:00
}
2020-03-11 07:57:03 +00:00
}
2020-01-16 12:36:28 +00:00
}
2020-07-30 13:28:30 +00:00
2020-01-16 12:36:28 +00:00
return nil
2020-07-30 13:28:30 +00:00
} , rotationByAnnotationOrder ... )
2020-01-16 12:36:28 +00:00
2022-01-12 12:44:33 +00:00
return plan
}
func createRotateOrUpgradePlanInternal ( log zerolog . Logger , apiObject k8sutil . APIObject , spec api . DeploymentSpec , status api . DeploymentStatus , cachedStatus inspectorInterface . Inspector , context PlanBuilderContext ) ( api . Plan , bool ) {
member , group , decision , update := createRotateOrUpgradeDecision ( log , spec , status , context )
if ! update {
// Nothing to do
return nil , false
}
if decision != nil {
// Upgrade phase
if decision . Hold {
// Holding upgrade
return nil , false
}
if ! decision . UpgradeNeeded {
// In upgrade scenario but upgrade is not needed
return nil , false
}
if ! decision . UpgradeAllowed {
context . CreateEvent ( k8sutil . NewUpgradeNotAllowedEvent ( apiObject , decision . FromVersion , decision . ToVersion , decision . FromLicense , decision . ToLicense ) )
return nil , false
}
if groupReadyForRestart ( context , spec , status , member , group ) {
return createUpgradeMemberPlan ( log , member , group , "Version upgrade" , spec , status , ! decision . AutoUpgradeNeeded ) , false
} else if util . BoolOrDefault ( spec . AllowUnsafeUpgrade , false ) {
log . Info ( ) . Msg ( "Pod needs upgrade but cluster is not ready. Either some shards are not in sync or some member is not ready, but unsafe upgrade is allowed" )
return createUpgradeMemberPlan ( log , member , group , "Version upgrade" , spec , status , ! decision . AutoUpgradeNeeded ) , false
2020-01-16 12:36:28 +00:00
} else {
2022-01-12 12:44:33 +00:00
log . Info ( ) . Msg ( "Pod needs upgrade but cluster is not ready. Either some shards are not in sync or some member is not ready." )
return nil , true
2020-01-16 12:36:28 +00:00
}
}
2021-07-15 12:07:33 +00:00
2022-01-12 12:44:33 +00:00
// Rotate phase
if ! rotation . CheckPossible ( member ) {
return nil , false
}
if member . Conditions . IsTrue ( api . ConditionTypeRestart ) {
return createRotateMemberPlan ( log , member , group , "Restart flag present" ) , false
}
if member . Conditions . IsTrue ( api . ConditionTypePendingUpdate ) {
arangoMember , ok := cachedStatus . ArangoMember ( member . ArangoMemberName ( apiObject . GetName ( ) , group ) )
if ! ok {
return nil , false
}
p , ok := cachedStatus . Pod ( member . PodName )
if ! ok {
p = nil
}
if mode , p , reason , err := rotation . IsRotationRequired ( log , cachedStatus , spec , member , group , p , arangoMember . Spec . Template , arangoMember . Status . Template ) ; err != nil {
log . Err ( err ) . Msgf ( "Error while generating update plan" )
return nil , false
} else if mode != rotation . InPlaceRotation {
return api . Plan { api . NewAction ( api . ActionTypeSetMemberCondition , group , member . ID , "Cleaning update" ) .
AddParam ( api . ConditionTypePendingUpdate . String ( ) , "" ) .
AddParam ( api . ConditionTypeUpdating . String ( ) , "T" ) } , false
} else {
p = p . After (
api . NewAction ( api . ActionTypeWaitForMemberUp , group , member . ID ) ,
api . NewAction ( api . ActionTypeWaitForMemberInSync , group , member . ID ) )
p = p . Wrap ( api . NewAction ( api . ActionTypeSetMemberCondition , group , member . ID , reason ) .
AddParam ( api . ConditionTypePendingUpdate . String ( ) , "" ) . AddParam ( api . ConditionTypeUpdating . String ( ) , "T" ) ,
api . NewAction ( api . ActionTypeSetMemberCondition , group , member . ID , reason ) .
AddParam ( api . ConditionTypeUpdating . String ( ) , "" ) )
return p , false
}
}
2020-04-16 05:57:48 +00:00
return nil , false
2020-01-16 12:36:28 +00:00
}
2022-01-12 12:44:33 +00:00
func createRotateOrUpgradeDecision ( log zerolog . Logger , spec api . DeploymentSpec , status api . DeploymentStatus , context PlanBuilderContext ) ( api . MemberStatus , api . ServerGroup , * upgradeDecision , bool ) {
// Upgrade phase
for _ , m := range status . Members . AsList ( ) {
if m . Member . Phase != api . MemberPhaseCreated || m . Member . PodName == "" {
// Only rotate when phase is created
continue
}
// Got pod, compare it with what it should be
decision := podNeedsUpgrading ( log , m . Member , spec , status . Images )
if decision . UpgradeNeeded || decision . Hold {
return m . Member , m . Group , & decision , true
}
}
// Update phase
for _ , m := range status . Members . AsList ( ) {
if ! groupReadyForRestart ( context , spec , status , m . Member , m . Group ) {
continue
}
if rotation . CheckPossible ( m . Member ) {
if m . Member . Conditions . IsTrue ( api . ConditionTypeRestart ) {
return m . Member , m . Group , nil , true
} else if m . Member . Conditions . IsTrue ( api . ConditionTypePendingUpdate ) {
if ! m . Member . Conditions . IsTrue ( api . ConditionTypeUpdating ) && ! m . Member . Conditions . IsTrue ( api . ConditionTypeUpdateFailed ) {
return m . Member , m . Group , nil , true
}
}
}
}
return api . MemberStatus { } , api . ServerGroupUnknown , nil , false
}
2020-01-16 12:36:28 +00:00
// podNeedsUpgrading decides if an upgrade of the pod is needed (to comply with
// the given spec) and if that is allowed.
2020-11-23 13:19:50 +00:00
func podNeedsUpgrading ( log zerolog . Logger , status api . MemberStatus , spec api . DeploymentSpec , images api . ImageInfoList ) upgradeDecision {
currentImage , found := currentImageInfo ( spec , images )
if ! found {
// Hold rotation tasks - we do not know image
return upgradeDecision { Hold : true }
}
memberImage , found := memberImageInfo ( spec , status , images )
if ! found {
// Member info not found
return upgradeDecision { UpgradeNeeded : false }
}
if currentImage . Image == memberImage . Image {
// No change
return upgradeDecision { UpgradeNeeded : false }
}
// Image changed, check if change is allowed
specVersion := currentImage . ArangoDBVersion
memberVersion := memberImage . ArangoDBVersion
asLicense := func ( info api . ImageInfo ) upgraderules . License {
if info . Enterprise {
return upgraderules . LicenseEnterprise
2020-01-16 12:36:28 +00:00
}
2020-11-23 13:19:50 +00:00
return upgraderules . LicenseCommunity
}
specLicense := asLicense ( currentImage )
memberLicense := asLicense ( memberImage )
if err := upgraderules . CheckUpgradeRulesWithLicense ( memberVersion , specVersion , memberLicense , specLicense ) ; err != nil {
// E.g. 3.x -> 4.x, we cannot allow automatically
return upgradeDecision {
FromVersion : memberVersion ,
FromLicense : memberLicense ,
ToVersion : specVersion ,
ToLicense : specLicense ,
UpgradeNeeded : true ,
UpgradeAllowed : false ,
2020-01-16 12:36:28 +00:00
}
2020-11-23 13:19:50 +00:00
}
if specVersion . Major ( ) != memberVersion . Major ( ) || specVersion . Minor ( ) != memberVersion . Minor ( ) {
// Is allowed, with `--database.auto-upgrade`
log . Info ( ) . Str ( "spec-version" , string ( specVersion ) ) . Str ( "pod-version" , string ( memberVersion ) ) .
Int ( "spec-version.major" , specVersion . Major ( ) ) . Int ( "spec-version.minor" , specVersion . Minor ( ) ) .
Int ( "pod-version.major" , memberVersion . Major ( ) ) . Int ( "pod-version.minor" , memberVersion . Minor ( ) ) .
Msg ( "Deciding to do a upgrade with --auto-upgrade" )
2020-01-16 12:36:28 +00:00
return upgradeDecision {
2020-11-23 13:19:50 +00:00
FromVersion : memberVersion ,
FromLicense : memberLicense ,
2020-01-16 12:36:28 +00:00
ToVersion : specVersion ,
ToLicense : specLicense ,
UpgradeNeeded : true ,
UpgradeAllowed : true ,
2020-11-23 13:19:50 +00:00
AutoUpgradeNeeded : true ,
2020-01-16 12:36:28 +00:00
}
}
2020-11-23 13:19:50 +00:00
// Patch version change, rotate only
return upgradeDecision {
FromVersion : memberVersion ,
FromLicense : memberLicense ,
ToVersion : specVersion ,
ToLicense : specLicense ,
UpgradeNeeded : true ,
UpgradeAllowed : true ,
2020-12-15 11:41:14 +00:00
AutoUpgradeNeeded : true ,
2020-11-23 13:19:50 +00:00
}
}
func currentImageInfo ( spec api . DeploymentSpec , images api . ImageInfoList ) ( api . ImageInfo , bool ) {
if i , ok := images . GetByImage ( spec . GetImage ( ) ) ; ok {
return i , true
}
if i , ok := images . GetByImageID ( spec . GetImage ( ) ) ; ok {
return i , true
}
return api . ImageInfo { } , false
}
func memberImageInfo ( spec api . DeploymentSpec , status api . MemberStatus , images api . ImageInfoList ) ( api . ImageInfo , bool ) {
if status . Image != nil {
return * status . Image , true
}
if i , ok := images . GetByImage ( spec . GetImage ( ) ) ; ok {
return i , true
}
if i , ok := images . GetByImageID ( spec . GetImage ( ) ) ; ok {
return i , true
}
return api . ImageInfo { } , false
2020-01-16 12:36:28 +00:00
}
2021-08-26 07:59:16 +00:00
func getPodDetails ( ctx context . Context , log zerolog . Logger , apiObject k8sutil . APIObject , spec api . DeploymentSpec ,
2020-03-17 08:31:52 +00:00
group api . ServerGroup , status api . DeploymentStatus , m api . MemberStatus ,
2021-08-26 07:59:16 +00:00
cachedStatus inspectorInterface . Inspector , planCtx PlanBuilderContext ) ( string , * core . Pod , * api . ArangoMember , bool ) {
imageInfo , imageFound := planCtx . SelectImageForMember ( spec , status , m )
2020-03-17 08:31:52 +00:00
if ! imageFound {
// Image is not found, so rotation is not needed
2021-08-26 07:59:16 +00:00
return "" , nil , nil , false
2020-03-17 08:31:52 +00:00
}
2021-08-26 07:59:16 +00:00
member , ok := cachedStatus . ArangoMember ( m . ArangoMemberName ( apiObject . GetName ( ) , group ) )
if ! ok {
return "" , nil , nil , false
2020-09-17 13:05:28 +00:00
}
2020-10-28 22:46:01 +00:00
groupSpec := spec . GetServerGroupSpec ( group )
2021-04-26 08:30:06 +00:00
renderedPod , err := planCtx . RenderPodForMember ( ctx , cachedStatus , spec , status , m . ID , imageInfo )
2020-03-17 08:31:52 +00:00
if err != nil {
log . Err ( err ) . Msg ( "Error while rendering pod" )
2021-08-26 07:59:16 +00:00
return "" , nil , nil , false
2020-03-17 08:31:52 +00:00
}
2020-10-28 22:46:01 +00:00
checksum , err := resources . ChecksumArangoPod ( groupSpec , renderedPod )
2020-03-17 08:31:52 +00:00
if err != nil {
log . Err ( err ) . Msg ( "Error while getting pod checksum" )
2021-08-26 07:59:16 +00:00
return "" , nil , nil , false
}
return checksum , renderedPod , member , true
}
// arangoMemberPodTemplateNeedsUpdate returns true when the specification of the
// given pod differs from what it should be according to the
// given deployment spec.
// When true is returned, a reason for the rotation is already returned.
func arangoMemberPodTemplateNeedsUpdate ( ctx context . Context , log zerolog . Logger , apiObject k8sutil . APIObject , spec api . DeploymentSpec ,
group api . ServerGroup , status api . DeploymentStatus , m api . MemberStatus ,
cachedStatus inspectorInterface . Inspector , planCtx PlanBuilderContext ) ( string , bool ) {
checksum , _ , member , valid := getPodDetails ( ctx , log , apiObject , spec , group , status , m , cachedStatus , planCtx )
if valid && ! member . Spec . Template . EqualPodSpecChecksum ( checksum ) {
return "Pod Spec changed" , true
}
return "" , false
}
2020-01-16 12:36:28 +00:00
// clusterReadyForUpgrade returns true if the cluster is ready for the next update, that is:
// - all shards are in sync
// - all members are ready and fine
2022-01-12 12:44:33 +00:00
func groupReadyForRestart ( context PlanBuilderContext , spec api . DeploymentSpec , status api . DeploymentStatus , member api . MemberStatus , group api . ServerGroup ) bool {
if util . BoolOrDefault ( spec . AllowUnsafeUpgrade , false ) {
return true
}
if ! status . Conditions . IsTrue ( api . ConditionTypeBootstrapCompleted ) {
// Restart is allowed always when bootstrap is not yet completed
return true
}
// If current member is not ready, kill anyway
if ! member . Conditions . IsTrue ( api . ConditionTypeReady ) {
return true
}
switch group {
case api . ServerGroupDBServers :
// TODO: Improve shard placement discovery and keep WriteConcern
return context . GetShardSyncStatus ( ) && status . Members . MembersOfGroup ( group ) . AllMembersReady ( )
default :
// In case of agents we can kill only one agent at same time
return status . Members . MembersOfGroup ( group ) . AllMembersReady ( )
}
2020-01-16 12:36:28 +00:00
}
// createUpgradeMemberPlan creates a plan to upgrade (stop-recreateWithAutoUpgrade-stop-start) an existing
// member.
func createUpgradeMemberPlan ( log zerolog . Logger , member api . MemberStatus ,
2020-11-23 13:19:50 +00:00
group api . ServerGroup , reason string , spec api . DeploymentSpec , status api . DeploymentStatus , rotateStatefull bool ) api . Plan {
2020-01-16 12:36:28 +00:00
upgradeAction := api . ActionTypeUpgradeMember
if rotateStatefull || group . IsStateless ( ) {
upgradeAction = api . ActionTypeRotateMember
}
log . Debug ( ) .
Str ( "id" , member . ID ) .
Str ( "role" , group . AsRole ( ) ) .
Str ( "reason" , reason ) .
Str ( "action" , string ( upgradeAction ) ) .
Msg ( "Creating upgrade plan" )
2021-01-19 14:39:23 +00:00
var plan = api . Plan {
api . NewAction ( api . ActionTypeCleanTLSKeyfileCertificate , group , member . ID , "Remove server keyfile and enforce renewal/recreation" ) ,
}
2020-11-23 13:19:50 +00:00
if status . CurrentImage == nil || status . CurrentImage . Image != spec . GetImage ( ) {
2021-07-15 12:07:33 +00:00
plan = plan . After ( api . NewAction ( api . ActionTypeSetCurrentImage , group , "" , reason ) . SetImage ( spec . GetImage ( ) ) )
2020-09-17 13:05:28 +00:00
}
2020-11-23 13:19:50 +00:00
if member . Image == nil || member . Image . Image != spec . GetImage ( ) {
2021-07-15 12:07:33 +00:00
plan = plan . After ( api . NewAction ( api . ActionTypeSetMemberCurrentImage , group , member . ID , reason ) . SetImage ( spec . GetImage ( ) ) )
}
plan = plan . After ( api . NewAction ( upgradeAction , group , member . ID , reason ) ,
api . NewAction ( api . ActionTypeWaitForMemberUp , group , member . ID ) )
return withSecureWrap ( member , group , spec , plan ... )
}
func withSecureWrap ( member api . MemberStatus ,
group api . ServerGroup , spec api . DeploymentSpec , plan ... api . Action ) api . Plan {
image := member . Image
if image == nil {
return plan
2020-01-16 12:36:28 +00:00
}
2021-07-15 12:07:33 +00:00
if skipResignLeadership ( spec . GetMode ( ) , image . ArangoDBVersion ) {
// In this case we skip resign leadership but we enable maintenance
return withMaintenanceStart ( plan ... )
} else {
return withResignLeadership ( group , member , "ResignLeadership" , plan ... )
}
}
func skipResignLeadership ( mode api . DeploymentMode , v driver . Version ) bool {
return mode == api . DeploymentModeCluster && features . Maintenance ( ) . Enabled ( ) && ( ( v . CompareTo ( "3.6.0" ) >= 0 && v . CompareTo ( "3.6.14" ) <= 0 ) ||
( v . CompareTo ( "3.7.0" ) >= 0 && v . CompareTo ( "3.7.12" ) <= 0 ) )
2020-01-16 12:36:28 +00:00
}