1
0
Fork 0
mirror of https://github.com/arangodb/kube-arangodb.git synced 2024-12-14 11:57:37 +00:00
kube-arangodb/pkg/deployment/deployment_inspector.go

243 lines
8.1 KiB
Go
Raw Normal View History

2018-03-15 15:33:28 +00:00
//
// DISCLAIMER
//
2020-03-04 10:25:14 +00:00
// Copyright 2020 ArangoDB GmbH, Cologne, Germany
2018-03-15 15:33:28 +00:00
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
// Author Ewout Prangsma
//
package deployment
import (
"context"
"time"
2020-03-11 07:57:03 +00:00
"github.com/arangodb/kube-arangodb/pkg/apis/deployment"
2019-11-04 07:49:24 +00:00
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
2018-08-31 14:08:21 +00:00
"github.com/arangodb/kube-arangodb/pkg/metrics"
2018-08-25 10:08:44 +00:00
"github.com/arangodb/kube-arangodb/pkg/util"
2018-03-15 15:33:28 +00:00
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2018-03-15 15:33:28 +00:00
)
2018-08-31 14:08:21 +00:00
var (
inspectDeploymentDurationGauges = metrics.MustRegisterGaugeVec(metricsComponent, "inspect_deployment_duration", "Amount of time taken by a single inspection of a deployment (in sec)", metrics.DeploymentName)
)
2018-03-15 15:33:28 +00:00
// inspectDeployment inspects the entire deployment, creates
// a plan to update if needed and inspects underlying resources.
// This function should be called when:
// - the deployment has changed
// - any of the underlying resources has changed
// - once in a while
// Returns the delay until this function should be called again.
2018-08-30 14:57:08 +00:00
func (d *Deployment) inspectDeployment(lastInterval util.Interval) util.Interval {
log := d.deps.Log
2018-08-31 14:08:21 +00:00
start := time.Now()
2018-03-15 15:33:28 +00:00
nextInterval := lastInterval
hasError := false
ctx := context.Background()
2018-08-31 14:08:21 +00:00
deploymentName := d.apiObject.GetName()
defer metrics.SetDuration(inspectDeploymentDurationGauges.WithLabelValues(deploymentName), start)
2018-03-15 15:33:28 +00:00
// Check deployment still exists
2019-11-04 07:49:24 +00:00
updated, err := d.deps.DatabaseCRCli.DatabaseV1().ArangoDeployments(d.apiObject.GetNamespace()).Get(deploymentName, metav1.GetOptions{})
if k8sutil.IsNotFound(err) {
// Deployment is gone
log.Info().Msg("Deployment is gone")
d.Delete()
return nextInterval
} else if updated != nil && updated.GetDeletionTimestamp() != nil {
// Deployment is marked for deletion
if err := d.runDeploymentFinalizers(ctx); err != nil {
hasError = true
d.CreateEvent(k8sutil.NewErrorEvent("ArangoDeployment finalizer inspection failed", err, d.apiObject))
}
} else {
2020-01-27 06:33:12 +00:00
// Check if maintenance annotation is set
if updated != nil && updated.Annotations != nil {
2020-03-11 07:57:03 +00:00
if v, ok := updated.Annotations[deployment.ArangoDeploymentPodMaintenanceAnnotation]; ok && v == "true" {
2020-01-27 06:33:12 +00:00
// Disable checks if we will enter maintenance mode
log.Info().Str("deployment", deploymentName).Msg("Deployment in maintenance mode")
return nextInterval
}
}
// Is the deployment in failed state, if so, give up.
2018-06-12 09:09:42 +00:00
if d.GetPhase() == api.DeploymentPhaseFailed {
log.Debug().Msg("Deployment is in Failed state.")
return nextInterval
}
// Inspect secret hashes
if err := d.resources.ValidateSecretHashes(); err != nil {
hasError = true
d.CreateEvent(k8sutil.NewErrorEvent("Secret hash validation failed", err, d.apiObject))
}
2018-12-03 11:06:10 +00:00
// Check for LicenseKeySecret
if err := d.resources.ValidateLicenseKeySecret(); err != nil {
hasError = true
d.CreateEvent(k8sutil.NewErrorEvent("License Key Secret invalid", err, d.apiObject))
}
// Is the deployment in a good state?
2018-06-12 09:09:42 +00:00
status, _ := d.GetStatus()
if status.Conditions.IsTrue(api.ConditionTypeSecretsChanged) {
log.Debug().Msg("Condition SecretsChanged is true. Revert secrets before we can continue")
return nextInterval
}
// Ensure we have image info
if retrySoon, err := d.ensureImages(d.apiObject); err != nil {
hasError = true
d.CreateEvent(k8sutil.NewErrorEvent("Image detection failed", err, d.apiObject))
} else if retrySoon {
nextInterval = minInspectionInterval
}
2018-03-20 12:10:52 +00:00
// Inspection of generated resources needed
2018-08-25 10:08:44 +00:00
if x, err := d.resources.InspectPods(ctx); err != nil {
hasError = true
d.CreateEvent(k8sutil.NewErrorEvent("Pod inspection failed", err, d.apiObject))
2018-08-25 10:08:44 +00:00
} else {
2018-08-30 14:57:08 +00:00
nextInterval = nextInterval.ReduceTo(x)
}
if x, err := d.resources.InspectPVCs(ctx); err != nil {
hasError = true
d.CreateEvent(k8sutil.NewErrorEvent("PVC inspection failed", err, d.apiObject))
} else {
2018-08-30 14:57:08 +00:00
nextInterval = nextInterval.ReduceTo(x)
}
2018-03-15 15:33:28 +00:00
// Check members for resilience
if err := d.resilience.CheckMemberFailure(); err != nil {
hasError = true
d.CreateEvent(k8sutil.NewErrorEvent("Member failure detection failed", err, d.apiObject))
}
// Immediate actions
if err := d.reconciler.CheckDeployment(); err != nil {
hasError = true
d.CreateEvent(k8sutil.NewErrorEvent("Reconciler immediate actions failed", err, d.apiObject))
}
// Create scale/update plan
2018-09-07 11:18:39 +00:00
if err := d.reconciler.CreatePlan(); err != nil {
hasError = true
d.CreateEvent(k8sutil.NewErrorEvent("Plan creation failed", err, d.apiObject))
}
// Execute current step of scale/update plan
retrySoon, err := d.reconciler.ExecutePlan(ctx)
if err != nil {
hasError = true
d.CreateEvent(k8sutil.NewErrorEvent("Plan execution failed", err, d.apiObject))
}
if retrySoon {
nextInterval = minInspectionInterval
}
2018-03-15 15:33:28 +00:00
// Ensure all resources are created
2018-08-31 14:08:21 +00:00
if err := d.resources.EnsureSecrets(); err != nil {
hasError = true
d.CreateEvent(k8sutil.NewErrorEvent("Secret creation failed", err, d.apiObject))
}
if err := d.resources.EnsureServices(); err != nil {
hasError = true
d.CreateEvent(k8sutil.NewErrorEvent("Service creation failed", err, d.apiObject))
}
2019-05-16 08:43:02 +00:00
if d.haveServiceMonitorCRD {
if err := d.resources.EnsureServiceMonitor(); err != nil {
hasError = true
d.CreateEvent(k8sutil.NewErrorEvent("Service monitor creation failed", err, d.apiObject))
}
}
2018-08-31 14:08:21 +00:00
if err := d.resources.EnsurePVCs(); err != nil {
hasError = true
d.CreateEvent(k8sutil.NewErrorEvent("PVC creation failed", err, d.apiObject))
}
if err := d.resources.EnsurePods(); err != nil {
hasError = true
d.CreateEvent(k8sutil.NewErrorEvent("Pod creation failed", err, d.apiObject))
}
if err := d.resources.EnsurePDBs(); err != nil {
hasError = true
d.CreateEvent(k8sutil.NewErrorEvent("PDB creation failed", err, d.apiObject))
}
2018-03-15 15:33:28 +00:00
2019-11-11 13:11:27 +00:00
if err := d.resources.EnsureAnnotations(); err != nil {
hasError = true
d.CreateEvent(k8sutil.NewErrorEvent("Annotation update failed", err, d.apiObject))
}
// Create access packages
2018-08-31 14:08:21 +00:00
if err := d.createAccessPackages(); err != nil {
hasError = true
d.CreateEvent(k8sutil.NewErrorEvent("AccessPackage creation failed", err, d.apiObject))
}
2018-05-31 11:20:49 +00:00
// Ensure deployment bootstrap
if err := d.EnsureBootstrap(); err != nil {
hasError = true
d.CreateEvent(k8sutil.NewErrorEvent("Bootstrap failed", err, d.apiObject))
}
// Inspect deployment for obsolete members
if err := d.resources.CleanupRemovedMembers(); err != nil {
hasError = true
d.CreateEvent(k8sutil.NewErrorEvent("Removed member cleanup failed", err, d.apiObject))
}
if err := d.backup.CheckRestore(); err != nil {
hasError = true
d.CreateEvent(k8sutil.NewErrorEvent("Restore operation failed", err, d.apiObject))
}
// At the end of the inspect, we cleanup terminated pods.
2018-08-30 14:57:08 +00:00
if x, err := d.resources.CleanupTerminatedPods(); err != nil {
hasError = true
d.CreateEvent(k8sutil.NewErrorEvent("Pod cleanup failed", err, d.apiObject))
2018-08-30 14:57:08 +00:00
} else {
nextInterval = nextInterval.ReduceTo(x)
}
2018-03-26 11:35:00 +00:00
}
2018-03-15 15:33:28 +00:00
// Update next interval (on errors)
if hasError {
if d.recentInspectionErrors == 0 {
nextInterval = minInspectionInterval
d.recentInspectionErrors++
}
} else {
d.recentInspectionErrors = 0
}
2018-08-30 14:57:08 +00:00
return nextInterval.ReduceTo(maxInspectionInterval)
2018-03-15 15:33:28 +00:00
}
// triggerInspection ensures that an inspection is run soon.
func (d *Deployment) triggerInspection() {
d.inspectTrigger.Trigger()
}
2019-05-16 08:43:02 +00:00
// triggerCRDInspection ensures that an inspection is run soon.
func (d *Deployment) triggerCRDInspection() {
d.inspectCRDTrigger.Trigger()
}