2018-03-15 15:33:28 +00:00
//
// DISCLAIMER
//
2023-09-05 12:20:29 +00:00
// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany
2018-03-15 15:33:28 +00:00
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
package deployment
import (
"context"
"time"
2023-09-02 00:17:01 +00:00
"github.com/rs/zerolog/log"
2020-03-11 07:57:03 +00:00
"github.com/arangodb/kube-arangodb/pkg/apis/deployment"
2019-11-04 07:49:24 +00:00
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
2022-07-11 11:49:47 +00:00
"github.com/arangodb/kube-arangodb/pkg/deployment/features"
"github.com/arangodb/kube-arangodb/pkg/deployment/patch"
2018-08-31 14:08:21 +00:00
"github.com/arangodb/kube-arangodb/pkg/metrics"
2022-03-24 11:40:51 +00:00
"github.com/arangodb/kube-arangodb/pkg/upgrade"
2018-08-25 10:08:44 +00:00
"github.com/arangodb/kube-arangodb/pkg/util"
2022-07-11 11:49:47 +00:00
"github.com/arangodb/kube-arangodb/pkg/util/errors"
"github.com/arangodb/kube-arangodb/pkg/util/globals"
2018-03-15 15:33:28 +00:00
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
2022-11-02 07:29:46 +00:00
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/kerrors"
2018-03-15 15:33:28 +00:00
)
2018-08-31 14:08:21 +00:00
var (
inspectDeploymentDurationGauges = metrics . MustRegisterGaugeVec ( metricsComponent , "inspect_deployment_duration" , "Amount of time taken by a single inspection of a deployment (in sec)" , metrics . DeploymentName )
)
2018-03-15 15:33:28 +00:00
// inspectDeployment inspects the entire deployment, creates
// a plan to update if needed and inspects underlying resources.
// This function should be called when:
// - the deployment has changed
// - any of the underlying resources has changed
// - once in a while
// Returns the delay until this function should be called again.
2018-08-30 14:57:08 +00:00
func ( d * Deployment ) inspectDeployment ( lastInterval util . Interval ) util . Interval {
2018-08-31 14:08:21 +00:00
start := time . Now ( )
2021-04-26 08:30:06 +00:00
2023-09-02 00:17:01 +00:00
if delay := d . delayer . Wait ( ) ; delay > 0 {
log . Info ( ) . Dur ( "delay" , delay ) . Msgf ( "Reconciliation loop execution was delayed" )
}
defer d . delayer . Delay ( d . config . ReconciliationDelay )
2021-12-10 12:04:48 +00:00
ctxReconciliation , cancelReconciliation := globals . GetGlobalTimeouts ( ) . Reconciliation ( ) . WithTimeout ( context . Background ( ) )
2021-04-26 08:30:06 +00:00
defer cancelReconciliation ( )
2020-06-08 11:30:32 +00:00
defer func ( ) {
2022-06-29 06:29:57 +00:00
d . log . Trace ( "Inspect loop took %s" , time . Since ( start ) )
2020-06-08 11:30:32 +00:00
} ( )
2018-03-15 15:33:28 +00:00
nextInterval := lastInterval
hasError := false
2021-04-26 08:30:06 +00:00
2021-07-01 15:27:54 +00:00
deploymentName := d . GetName ( )
2018-08-31 14:08:21 +00:00
defer metrics . SetDuration ( inspectDeploymentDurationGauges . WithLabelValues ( deploymentName ) , start )
2018-03-15 15:33:28 +00:00
2022-05-31 11:31:00 +00:00
err := d . acs . CurrentClusterCache ( ) . Refresh ( ctxReconciliation )
2020-06-08 11:30:32 +00:00
if err != nil {
2022-06-14 07:26:07 +00:00
d . log . Err ( err ) . Error ( "Unable to get resources" )
2020-06-08 11:30:32 +00:00
return minInspectionInterval // Retry ASAP
}
2018-04-03 15:43:42 +00:00
// Check deployment still exists
2022-05-31 11:31:00 +00:00
updated , err := d . acs . CurrentClusterCache ( ) . GetCurrentArangoDeployment ( )
2022-11-02 07:29:46 +00:00
if kerrors . IsNotFound ( err ) {
2018-04-03 15:43:42 +00:00
// Deployment is gone
2022-06-14 07:26:07 +00:00
d . log . Info ( "Deployment is gone" )
2022-07-18 21:49:57 +00:00
d . Stop ( )
2018-04-03 15:43:42 +00:00
return nextInterval
2022-10-04 14:17:19 +00:00
} else if err != nil {
d . log . Err ( err ) . Error ( "Deployment fetch error" )
return nextInterval
} else if d . uid != updated . GetUID ( ) {
d . log . Error ( "Deployment UID Changed!" )
return nextInterval
2018-06-07 14:22:02 +00:00
} else if updated != nil && updated . GetDeletionTimestamp ( ) != nil {
// Deployment is marked for deletion
2022-03-23 22:19:36 +00:00
if err := d . runDeploymentFinalizers ( ctxReconciliation , d . GetCachedStatus ( ) ) ; err != nil {
2018-06-07 14:22:02 +00:00
hasError = true
2022-08-25 11:44:28 +00:00
d . CreateEvent ( k8sutil . NewErrorEvent ( "ArangoDeployment finalizer inspection failed" , err , d . currentObject ) )
2018-06-07 14:22:02 +00:00
}
} else {
2020-01-27 06:33:12 +00:00
// Check if maintenance annotation is set
if updated != nil && updated . Annotations != nil {
2020-03-11 07:57:03 +00:00
if v , ok := updated . Annotations [ deployment . ArangoDeploymentPodMaintenanceAnnotation ] ; ok && v == "true" {
2020-01-27 06:33:12 +00:00
// Disable checks if we will enter maintenance mode
2022-06-14 07:26:07 +00:00
d . log . Str ( "deployment" , deploymentName ) . Info ( "Deployment in maintenance mode" )
2020-01-27 06:33:12 +00:00
return nextInterval
}
}
2022-08-25 11:44:28 +00:00
if ensureFinalizers ( updated ) {
if err := d . ApplyPatch ( ctxReconciliation , patch . ItemReplace ( patch . NewPath ( "metadata" , "finalizers" ) , updated . Finalizers ) ) ; err != nil {
d . log . Err ( err ) . Debug ( "Unable to set finalizers" )
}
}
if canProceed , changed , err := d . acceptNewSpec ( ctxReconciliation , updated ) ; err != nil {
d . log . Err ( err ) . Debug ( "Verification of deployment failed" )
if ! canProceed {
return minInspectionInterval // Retry ASAP
}
} else if changed {
d . log . Info ( "Accepted new spec" )
return minInspectionInterval // Retry ASAP
} else if ! canProceed {
d . log . Err ( err ) . Error ( "Cannot proceed with reconciliation" )
return minInspectionInterval // Retry ASAP
}
// Ensure that status is up to date
if ! d . currentObjectStatus . Equal ( updated . Status ) {
2022-09-13 10:52:28 +00:00
d . metrics . Errors . StatusRestores ++
2022-08-25 11:44:28 +00:00
if err := d . updateCRStatus ( ctxReconciliation , * d . currentObjectStatus ) ; err != nil {
d . log . Err ( err ) . Error ( "Unable to refresh status" )
return minInspectionInterval // Retry ASAP
}
}
2022-09-13 10:52:28 +00:00
// Ensure that fields are recovered
currentStatus := d . GetStatus ( )
if updated , err := RecoverStatus ( & currentStatus , RecoverPodDetails ) ; err != nil {
d . log . Err ( err ) . Error ( "Unable to recover status" )
return minInspectionInterval // Retry ASAP
} else if updated {
d . metrics . Errors . StatusRestores ++
if err := d . updateCRStatus ( ctxReconciliation , currentStatus ) ; err != nil {
d . log . Err ( err ) . Error ( "Unable to refresh status" )
return minInspectionInterval // Retry ASAP
}
}
2022-08-25 11:44:28 +00:00
d . currentObject = updated
d . metrics . Deployment . Accepted = updated . Status . Conditions . IsTrue ( api . ConditionTypeSpecAccepted )
2022-12-19 11:53:42 +00:00
d . metrics . Deployment . Propagated = updated . Status . Conditions . IsTrue ( api . ConditionTypeSpecPropagated )
2022-08-25 11:44:28 +00:00
d . metrics . Deployment . UpToDate = updated . Status . Conditions . IsTrue ( api . ConditionTypeUpToDate )
2018-06-07 14:22:02 +00:00
// Is the deployment in failed state, if so, give up.
2018-06-12 09:09:42 +00:00
if d . GetPhase ( ) == api . DeploymentPhaseFailed {
2022-06-14 07:26:07 +00:00
d . log . Debug ( "Deployment is in Failed state." )
2018-06-07 14:22:02 +00:00
return nextInterval
}
2018-03-27 10:11:57 +00:00
2022-03-18 23:49:20 +00:00
d . GetMembersState ( ) . RefreshState ( ctxReconciliation , updated . Status . Members . AsList ( ) )
2022-06-14 07:26:07 +00:00
d . GetMembersState ( ) . Log ( d . log )
2022-03-24 11:40:51 +00:00
if err := d . WithStatusUpdateErr ( ctxReconciliation , func ( s * api . DeploymentStatus ) ( bool , error ) {
if changed , err := upgrade . RunUpgrade ( * updated , s , d . GetCachedStatus ( ) ) ; err != nil {
return false , err
} else {
return changed , nil
}
} ) ; err != nil {
2022-08-25 11:44:28 +00:00
d . CreateEvent ( k8sutil . NewErrorEvent ( "Upgrade failed" , err , d . currentObject ) )
2022-03-24 11:40:51 +00:00
nextInterval = minInspectionInterval
d . recentInspectionErrors ++
return nextInterval . ReduceTo ( maxInspectionInterval )
}
2022-02-03 23:03:12 +00:00
2022-03-23 22:19:36 +00:00
inspectNextInterval , err := d . inspectDeploymentWithError ( ctxReconciliation , nextInterval )
2021-04-26 08:30:06 +00:00
if err != nil {
2022-07-11 11:49:47 +00:00
if ! errors . IsReconcile ( err ) {
2020-06-08 11:30:32 +00:00
nextInterval = inspectNextInterval
hasError = true
2018-03-27 10:11:57 +00:00
2022-08-25 11:44:28 +00:00
d . CreateEvent ( k8sutil . NewErrorEvent ( "Reconciliation failed" , err , d . currentObject ) )
2020-06-08 11:30:32 +00:00
} else {
nextInterval = minInspectionInterval
}
2018-06-07 14:22:02 +00:00
}
2020-04-01 13:38:03 +00:00
}
2018-03-27 10:11:57 +00:00
2020-04-01 13:38:03 +00:00
// Update next interval (on errors)
if hasError {
if d . recentInspectionErrors == 0 {
2018-06-07 14:22:02 +00:00
nextInterval = minInspectionInterval
2020-04-01 13:38:03 +00:00
d . recentInspectionErrors ++
2018-06-07 14:22:02 +00:00
}
2020-04-01 13:38:03 +00:00
} else {
d . recentInspectionErrors = 0
}
return nextInterval . ReduceTo ( maxInspectionInterval )
}
2018-03-20 12:10:52 +00:00
2022-04-27 10:32:23 +00:00
// inspectDeploymentWithError ensures that the deployment is in a valid state
2022-03-23 22:19:36 +00:00
func ( d * Deployment ) inspectDeploymentWithError ( ctx context . Context , lastInterval util . Interval ) ( nextInterval util . Interval , inspectError error ) {
2020-06-08 11:30:32 +00:00
t := time . Now ( )
2021-02-10 08:17:52 +00:00
2020-06-08 11:30:32 +00:00
defer func ( ) {
2022-06-29 06:29:57 +00:00
d . log . Trace ( "Reconciliation loop took %s" , time . Since ( t ) )
2020-06-08 11:30:32 +00:00
} ( )
2020-04-01 13:38:03 +00:00
// Ensure that spec and status checksum are same
2022-08-25 11:44:28 +00:00
currentSpec := d . currentObject . Spec
status := d . GetStatus ( )
2018-03-15 15:33:28 +00:00
2020-04-01 13:38:03 +00:00
nextInterval = lastInterval
inspectError = nil
2018-03-29 09:56:57 +00:00
2022-08-25 11:44:28 +00:00
currentChecksum , err := currentSpec . Checksum ( )
2020-04-01 13:38:03 +00:00
if err != nil {
return minInspectionInterval , errors . Wrapf ( err , "Calculation of spec failed" )
} else {
2022-08-25 11:44:28 +00:00
condition , exists := status . Conditions . Get ( api . ConditionTypeSpecAccepted )
if v := status . AcceptedSpecVersion ; ( v == nil || currentChecksum != * v ) && ( ! exists || condition . IsTrue ( ) ) {
if err = d . updateConditionWithHash ( ctx , api . ConditionTypeSpecAccepted , false , "Spec Changed" , "Spec Object changed. Waiting to be accepted" , currentChecksum ) ; err != nil {
return minInspectionInterval , errors . Wrapf ( err , "Unable to update SpecAccepted condition" )
}
return minInspectionInterval , nil // Retry ASAP
} else if v != nil {
if * v == currentChecksum && ! condition . IsTrue ( ) {
if err = d . updateConditionWithHash ( ctx , api . ConditionTypeSpecAccepted , true , "Spec Accepted" , "Spec Object accepted" , currentChecksum ) ; err != nil {
return minInspectionInterval , errors . Wrapf ( err , "Unable to update SpecAccepted condition" )
}
return minInspectionInterval , nil // Retry ASAP
}
}
}
if ! status . Conditions . IsTrue ( api . ConditionTypeSpecAccepted ) {
2020-04-01 13:38:03 +00:00
condition , exists := status . Conditions . Get ( api . ConditionTypeUpToDate )
2022-08-25 11:44:28 +00:00
if ! exists || condition . IsTrue ( ) {
2022-12-19 11:53:42 +00:00
propagatedCondition , propagatedExists := status . Conditions . Get ( api . ConditionTypeSpecPropagated )
if ! propagatedExists || propagatedCondition . IsTrue ( ) {
if err = d . updateConditionWithHash ( ctx , api . ConditionTypeSpecPropagated , false , "Spec Changed" , "Spec Object changed. Waiting until spec will be applied" , "" ) ; err != nil {
return minInspectionInterval , errors . Wrapf ( err , "Unable to update SpecPropagated condition" )
}
return minInspectionInterval , nil // Retry ASAP
}
2022-08-25 11:44:28 +00:00
if err = d . updateConditionWithHash ( ctx , api . ConditionTypeUpToDate , false , "Spec Changed" , "Spec Object changed. Waiting until plan will be applied" , currentChecksum ) ; err != nil {
2020-04-01 13:38:03 +00:00
return minInspectionInterval , errors . Wrapf ( err , "Unable to update UpToDate condition" )
2022-08-25 11:44:28 +00:00
2020-04-01 13:38:03 +00:00
}
2019-03-22 09:48:42 +00:00
2020-04-01 13:38:03 +00:00
return minInspectionInterval , nil // Retry ASAP
2018-09-07 11:18:39 +00:00
}
2020-04-01 13:38:03 +00:00
}
2018-09-07 11:18:39 +00:00
2022-08-25 11:44:28 +00:00
if err := d . acs . Inspect ( ctx , d . currentObject , d . deps . Client , d . GetCachedStatus ( ) ) ; err != nil {
2022-06-14 07:26:07 +00:00
d . log . Err ( err ) . Warn ( "Unable to handle ACS objects" )
2022-03-10 11:29:55 +00:00
}
2020-12-15 11:41:14 +00:00
// Cleanup terminated pods on the beginning of loop
2022-05-15 16:11:41 +00:00
if x , err := d . resources . CleanupTerminatedPods ( ctx ) ; err != nil {
2020-12-15 11:41:14 +00:00
return minInspectionInterval , errors . Wrapf ( err , "Pod cleanup failed" )
} else {
nextInterval = nextInterval . ReduceTo ( x )
}
2022-11-24 08:04:16 +00:00
if err := d . resources . EnsureCoreResources ( ctx , d . GetCachedStatus ( ) ) ; err != nil {
d . log . Err ( err ) . Error ( "Unable to ensure core resources" )
2021-03-10 13:30:47 +00:00
}
2020-04-01 13:38:03 +00:00
// Inspect secret hashes
2022-03-23 22:19:36 +00:00
if err := d . resources . ValidateSecretHashes ( ctx , d . GetCachedStatus ( ) ) ; err != nil {
2020-04-01 13:38:03 +00:00
return minInspectionInterval , errors . Wrapf ( err , "Secret hash validation failed" )
}
2018-03-15 15:33:28 +00:00
2020-04-01 13:38:03 +00:00
// Is the deployment in a good state?
if status . Conditions . IsTrue ( api . ConditionTypeSecretsChanged ) {
2021-01-08 14:35:38 +00:00
return minInspectionInterval , errors . Newf ( "Secrets changed" )
2020-04-01 13:38:03 +00:00
}
// Ensure we have image info
2022-08-25 11:44:28 +00:00
if retrySoon , exists , err := d . ensureImages ( ctx , d . currentObject , d . GetCachedStatus ( ) ) ; err != nil {
2020-04-01 13:38:03 +00:00
return minInspectionInterval , errors . Wrapf ( err , "Image detection failed" )
2020-06-26 06:53:24 +00:00
} else if retrySoon || ! exists {
2020-04-01 13:38:03 +00:00
return minInspectionInterval , nil
}
// Inspection of generated resources needed
2022-03-23 22:19:36 +00:00
if x , err := d . resources . InspectPods ( ctx , d . GetCachedStatus ( ) ) ; err != nil {
2020-04-01 13:38:03 +00:00
return minInspectionInterval , errors . Wrapf ( err , "Pod inspection failed" )
} else {
nextInterval = nextInterval . ReduceTo ( x )
}
2020-06-08 11:30:32 +00:00
2022-03-23 22:19:36 +00:00
if x , err := d . resources . InspectPVCs ( ctx , d . GetCachedStatus ( ) ) ; err != nil {
2020-04-01 13:38:03 +00:00
return minInspectionInterval , errors . Wrapf ( err , "PVC inspection failed" )
} else {
nextInterval = nextInterval . ReduceTo ( x )
}
// Check members for resilience
2021-04-26 08:30:06 +00:00
if err := d . resilience . CheckMemberFailure ( ctx ) ; err != nil {
2020-04-01 13:38:03 +00:00
return minInspectionInterval , errors . Wrapf ( err , "Member failure detection failed" )
}
// Immediate actions
2021-04-26 08:30:06 +00:00
if err := d . reconciler . CheckDeployment ( ctx ) ; err != nil {
2020-04-01 13:38:03 +00:00
return minInspectionInterval , errors . Wrapf ( err , "Reconciler immediate actions failed" )
}
2022-11-24 08:04:16 +00:00
if err := d . resources . EnsureResources ( ctx , d . haveServiceMonitorCRD , d . GetCachedStatus ( ) ) ; err != nil {
d . log . Err ( err ) . Error ( "Unable to ensure resources" )
2020-04-01 13:38:03 +00:00
}
2022-07-07 12:09:17 +00:00
d . metrics . Agency . Fetches ++
2021-12-06 10:31:17 +00:00
if offset , err := d . RefreshAgencyCache ( ctx ) ; err != nil {
2022-07-07 12:09:17 +00:00
d . metrics . Agency . Errors ++
2022-06-14 07:26:07 +00:00
d . log . Err ( err ) . Error ( "Unable to refresh agency" )
2021-12-06 10:31:17 +00:00
} else {
2022-07-07 12:09:17 +00:00
d . metrics . Agency . Index = offset
2021-12-06 10:31:17 +00:00
}
2021-07-15 12:07:33 +00:00
// Refresh maintenance lock
d . refreshMaintenanceTTL ( ctx )
2020-04-01 13:38:03 +00:00
// Create scale/update plan
2022-08-25 11:44:28 +00:00
if _ , ok := d . currentObject . Annotations [ deployment . ArangoDeploymentPlanCleanAnnotation ] ; ok {
2021-04-26 08:30:06 +00:00
if err := d . ApplyPatch ( ctx , patch . ItemRemove ( patch . NewPath ( "metadata" , "annotations" , deployment . ArangoDeploymentPlanCleanAnnotation ) ) ) ; err != nil {
2020-11-27 12:49:28 +00:00
return minInspectionInterval , errors . Wrapf ( err , "Unable to create remove annotation patch" )
}
2021-04-26 08:30:06 +00:00
if err := d . WithStatusUpdate ( ctx , func ( s * api . DeploymentStatus ) bool {
2020-11-27 12:49:28 +00:00
s . Plan = nil
return true
2022-08-25 11:44:28 +00:00
} ) ; err != nil {
2020-11-27 12:49:28 +00:00
return minInspectionInterval , errors . Wrapf ( err , "Unable clean plan" )
}
2022-06-14 07:26:07 +00:00
} else if err , updated := d . reconciler . CreatePlan ( ctx ) ; err != nil {
2020-04-01 13:38:03 +00:00
return minInspectionInterval , errors . Wrapf ( err , "Plan creation failed" )
2020-04-16 05:57:48 +00:00
} else if updated {
2022-06-14 07:26:07 +00:00
d . log . Info ( "Plan generated, reconciling" )
2020-04-16 05:57:48 +00:00
return minInspectionInterval , nil
}
2022-02-22 15:55:33 +00:00
// Reachable state ensurer
reachableConditionState := status . Conditions . Check ( api . ConditionTypeReachable ) . Exists ( ) . IsTrue ( ) . Evaluate ( )
2022-03-18 23:49:20 +00:00
if d . GetMembersState ( ) . State ( ) . IsReachable ( ) {
2022-02-22 15:55:33 +00:00
if ! reachableConditionState {
if err = d . updateConditionWithHash ( ctx , api . ConditionTypeReachable , true , "ArangoDB is reachable" , "" , "" ) ; err != nil {
return minInspectionInterval , errors . Wrapf ( err , "Unable to update Reachable condition" )
}
}
} else {
if reachableConditionState {
if err = d . updateConditionWithHash ( ctx , api . ConditionTypeReachable , false , "ArangoDB is not reachable" , "" , "" ) ; err != nil {
return minInspectionInterval , errors . Wrapf ( err , "Unable to update Reachable condition" )
}
}
}
2022-08-25 11:44:28 +00:00
if v := status . AcceptedSpecVersion ; v != nil && d . currentObject . Status . IsPlanEmpty ( ) && status . AppliedVersion != * v {
2021-04-26 08:30:06 +00:00
if err := d . WithStatusUpdate ( ctx , func ( s * api . DeploymentStatus ) bool {
2022-08-25 11:44:28 +00:00
s . AppliedVersion = * v
2020-04-16 05:57:48 +00:00
return true
} ) ; err != nil {
return minInspectionInterval , errors . Wrapf ( err , "Unable to update UpToDate condition" )
}
return minInspectionInterval , nil
2022-08-25 11:44:28 +00:00
} else {
2022-02-22 15:55:33 +00:00
isUpToDate , reason := d . isUpToDateStatus ( status )
2021-08-30 09:07:52 +00:00
if ! isUpToDate && status . Conditions . IsTrue ( api . ConditionTypeUpToDate ) {
2022-08-25 11:44:28 +00:00
if err = d . updateConditionWithHash ( ctx , api . ConditionTypeUpToDate , false , reason , "There are pending operations in plan or members are in restart process" , * v ) ; err != nil {
2020-04-16 05:57:48 +00:00
return minInspectionInterval , errors . Wrapf ( err , "Unable to update UpToDate condition" )
}
return minInspectionInterval , nil
}
2021-08-30 09:07:52 +00:00
if isUpToDate && ! status . Conditions . IsTrue ( api . ConditionTypeUpToDate ) {
2022-08-29 19:51:23 +00:00
d . sendCIUpdate ( )
2022-08-25 11:44:28 +00:00
if err = d . updateConditionWithHash ( ctx , api . ConditionTypeUpToDate , true , "Spec is Up To Date" , "Spec is Up To Date" , * v ) ; err != nil {
2020-04-16 05:57:48 +00:00
return minInspectionInterval , errors . Wrapf ( err , "Unable to update UpToDate condition" )
}
return minInspectionInterval , nil
}
2020-04-01 13:38:03 +00:00
}
2022-12-19 11:53:42 +00:00
if status . Conditions . IsTrue ( api . ConditionTypeUpToDate ) && ! status . Conditions . IsTrue ( api . ConditionTypeSpecPropagated ) {
if err = d . updateConditionWithHash ( ctx , api . ConditionTypeSpecPropagated , true , "Spec is Propagated" , "Spec is Propagated" , "" ) ; err != nil {
return minInspectionInterval , errors . Wrapf ( err , "Unable to update SpecPropagated condition" )
}
}
2020-04-01 13:38:03 +00:00
// Execute current step of scale/update plan
2022-06-14 07:26:07 +00:00
retrySoon , err := d . reconciler . ExecutePlan ( ctx )
2020-04-01 13:38:03 +00:00
if err != nil {
return minInspectionInterval , errors . Wrapf ( err , "Plan execution failed" )
}
if retrySoon {
nextInterval = minInspectionInterval
}
2018-03-15 15:33:28 +00:00
2020-04-01 13:38:03 +00:00
// Create access packages
2021-04-26 08:30:06 +00:00
if err := d . createAccessPackages ( ctx ) ; err != nil {
2020-04-01 13:38:03 +00:00
return minInspectionInterval , errors . Wrapf ( err , "AccessPackage creation failed" )
}
2019-11-11 13:11:27 +00:00
2022-04-15 13:56:05 +00:00
// Inspect deployment for synced members
2022-07-25 10:41:31 +00:00
if health , ok := d . GetMembersState ( ) . Health ( ) ; ok {
if err := d . resources . SyncMembersInCluster ( ctx , health ) ; err != nil {
return minInspectionInterval , errors . Wrapf ( err , "Removed member cleanup failed" )
}
2020-04-01 13:38:03 +00:00
}
2019-01-14 13:30:39 +00:00
2020-04-01 13:38:03 +00:00
// At the end of the inspect, we cleanup terminated pods.
2022-05-15 16:11:41 +00:00
if x , err := d . resources . CleanupTerminatedPods ( ctx ) ; err != nil {
2020-04-01 13:38:03 +00:00
return minInspectionInterval , errors . Wrapf ( err , "Pod cleanup failed" )
} else {
nextInterval = nextInterval . ReduceTo ( x )
}
2019-09-27 11:04:23 +00:00
2020-04-01 13:38:03 +00:00
return
}
2022-08-29 19:51:23 +00:00
func ( d * Deployment ) sendCIUpdate ( ) {
if ci := d . clusterScalingIntegration ; ci != nil {
if c := d . currentObjectStatus ; c != nil {
if a := c . AcceptedSpec ; a != nil {
ci . SendUpdateToCluster ( * a )
}
}
}
}
2022-02-22 15:55:33 +00:00
func ( d * Deployment ) isUpToDateStatus ( status api . DeploymentStatus ) ( upToDate bool , reason string ) {
2022-11-03 13:39:46 +00:00
if status . NonInternalActions ( ) > 0 {
2021-08-30 09:07:52 +00:00
return false , "Plan is not empty"
}
upToDate = true
2022-08-25 11:44:28 +00:00
if v := status . AcceptedSpecVersion ; v == nil || status . AppliedVersion != * v {
upToDate = false
reason = "Spec is not accepted"
return
}
if ! status . Conditions . Check ( api . ConditionTypeSpecAccepted ) . Exists ( ) . IsTrue ( ) . Evaluate ( ) {
upToDate = false
reason = "Spec is not accepted"
return
}
2022-11-16 07:59:02 +00:00
if ! status . Conditions . Check ( api . ConditionTypeBootstrapCompleted ) . Exists ( ) . IsTrue ( ) . Evaluate ( ) {
reason = "ArangoDB is not bootstrapped"
upToDate = false
return
}
2022-02-22 15:55:33 +00:00
if ! status . Conditions . Check ( api . ConditionTypeReachable ) . Exists ( ) . IsTrue ( ) . Evaluate ( ) {
2022-11-16 07:59:02 +00:00
reason = "ArangoDB is not reachable"
2022-02-22 15:55:33 +00:00
upToDate = false
2022-07-24 18:26:26 +00:00
return
2022-02-22 15:55:33 +00:00
}
2022-07-24 18:26:26 +00:00
for _ , m := range status . Members . AsList ( ) {
member := m . Member
if member . Conditions . IsTrue ( api . ConditionTypeRestart ) || member . Conditions . IsTrue ( api . ConditionTypePendingRestart ) {
upToDate = false
reason = "Pending restarts on members"
return
2022-02-22 15:55:33 +00:00
}
2022-07-24 18:26:26 +00:00
if member . Conditions . IsTrue ( api . ConditionTypePVCResizePending ) {
upToDate = false
reason = "PVC is resizing"
return
2021-08-30 09:07:52 +00:00
}
2022-07-24 18:26:26 +00:00
}
2021-08-30 09:07:52 +00:00
return
}
2021-07-15 12:07:33 +00:00
func ( d * Deployment ) refreshMaintenanceTTL ( ctx context . Context ) {
2022-08-25 11:44:28 +00:00
if d . GetSpec ( ) . Mode . Get ( ) == api . DeploymentModeSingle {
2021-07-15 12:07:33 +00:00
return
}
if ! features . Maintenance ( ) . Enabled ( ) {
// Maintenance feature is not enabled
return
}
2022-04-25 11:56:04 +00:00
agencyState , agencyOK := d . GetAgencyCache ( )
if ! agencyOK {
return
}
2022-08-25 11:44:28 +00:00
status := d . GetStatus ( )
condition , ok := status . Conditions . Get ( api . ConditionTypeMaintenance )
2022-04-25 11:56:04 +00:00
maintenance := agencyState . Supervision . Maintenance
2021-07-15 12:07:33 +00:00
if ! ok || ! condition . IsTrue ( ) {
return
}
// Check GracePeriod
2022-04-25 11:56:04 +00:00
if t , ok := maintenance . Time ( ) ; ok {
2022-08-25 11:44:28 +00:00
if time . Until ( t ) < time . Hour - d . GetSpec ( ) . Timeouts . GetMaintenanceGracePeriod ( ) {
2022-04-25 11:56:04 +00:00
if err := d . SetAgencyMaintenanceMode ( ctx , true ) ; err != nil {
return
}
2022-06-14 07:26:07 +00:00
d . log . Info ( "Refreshed maintenance lock" )
2021-07-15 12:07:33 +00:00
}
2022-04-25 11:56:04 +00:00
} else {
2022-08-25 11:44:28 +00:00
if condition . LastUpdateTime . Add ( d . GetSpec ( ) . Timeouts . GetMaintenanceGracePeriod ( ) ) . Before ( time . Now ( ) ) {
2022-04-25 11:56:04 +00:00
if err := d . SetAgencyMaintenanceMode ( ctx , true ) ; err != nil {
return
}
2022-06-14 07:26:07 +00:00
d . log . Info ( "Refreshed maintenance lock" )
2021-07-15 12:07:33 +00:00
}
}
}
2018-03-23 14:36:10 +00:00
// triggerInspection ensures that an inspection is run soon.
func ( d * Deployment ) triggerInspection ( ) {
d . inspectTrigger . Trigger ( )
}
2019-05-16 08:43:02 +00:00
2022-01-15 22:55:08 +00:00
func ( d * Deployment ) updateConditionWithHash ( ctx context . Context , conditionType api . ConditionType , status bool , reason , message , hash string ) error {
2022-06-14 07:26:07 +00:00
d . log . Str ( "condition" , string ( conditionType ) ) . Bool ( "status" , status ) . Str ( "reason" , reason ) . Str ( "message" , message ) . Str ( "hash" , hash ) . Info ( "Updated condition" )
2021-04-26 08:30:06 +00:00
if err := d . WithStatusUpdate ( ctx , func ( s * api . DeploymentStatus ) bool {
2022-01-15 22:55:08 +00:00
return s . Conditions . UpdateWithHash ( conditionType , status , reason , message , hash )
2020-04-16 05:57:48 +00:00
} ) ; err != nil {
return errors . Wrapf ( err , "Unable to update condition" )
}
return nil
}