1
0
Fork 0
mirror of https://github.com/arangodb/kube-arangodb.git synced 2024-12-14 11:57:37 +00:00

[Feature] Parametrize ForceDelete timeout (#1632)

This commit is contained in:
Adam Janikowski 2024-03-25 13:57:16 +01:00 committed by GitHub
parent 386efa1818
commit cbb16bce9a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 48 additions and 33 deletions

View file

@ -13,6 +13,7 @@
- (Bugfix) Remove ImagePullSecrets Reference from Container
- (Feature) DebugPackage ArangoProfiles
- (Feature) Scheduler CLI
- (Feature) Parametrize ForceDelete timeout
## [1.2.39](https://github.com/arangodb/kube-arangodb/tree/1.2.39) (2024-03-11)
- (Feature) Extract Scheduler API

View file

@ -168,7 +168,7 @@ Flags:
--kubernetes.max-batch-size int Size of batch during objects read (default 256)
--kubernetes.qps float32 Number of queries per second for k8s API (default 15)
--log.format string Set log format. Allowed values: 'pretty', 'JSON'. If empty, default format is used (default "pretty")
--log.level stringArray Set log levels in format <level> or <logger>=<level>. Possible loggers: action, agency, api-server, assertion, backup-operator, chaos-monkey, crd, deployment, deployment-ci, deployment-reconcile, deployment-replication, deployment-resilience, deployment-resources, deployment-storage, deployment-storage-pc, deployment-storage-service, http, inspector, integrations, k8s-client, ml-batchjob-operator, ml-cronjob-operator, ml-extension-operator, ml-extension-shutdown, ml-storage-operator, monitor, operator, operator-arangojob-handler, operator-v2, operator-v2-event, operator-v2-worker, panics, pod_compare, root, root-event-recorder, server, server-authentication (default [info])
--log.level stringArray Set log levels in format <level> or <logger>=<level>. Possible loggers: action, agency, api-server, assertion, backup-operator, chaos-monkey, crd, deployment, deployment-ci, deployment-reconcile, deployment-replication, deployment-resilience, deployment-resources, deployment-storage, deployment-storage-pc, deployment-storage-service, http, inspector, integrations, k8s-client, ml-batchjob-operator, ml-cronjob-operator, ml-extension-operator, ml-extension-shutdown, ml-storage-operator, monitor, operator, operator-arangojob-handler, operator-v2, operator-v2-event, operator-v2-worker, panics, pod_compare, root, root-event-recorder, scheduler, server, server-authentication (default [info])
--log.sampling If true, operator will try to minimize duplication of logging events (default true)
--memory-limit uint Define memory limit for hard shutdown and the dump of goroutines. Used for testing
--metrics.excluded-prefixes stringArray List of the excluded metrics prefixes
@ -196,6 +196,7 @@ Flags:
--timeout.arangod-check duration The version check request timeout to the ArangoDB (default 2s)
--timeout.backup-arangod duration The request timeout to the ArangoDB during backup calls (default 30s)
--timeout.backup-upload duration The request timeout to the ArangoDB during uploading files (default 5m0s)
--timeout.force-delete-pod-grace-period duration Default period when ArangoDB Pod should be forcefully removed after all containers were stopped - set to 0 to disable forceful removals (default 15m0s)
--timeout.k8s duration The request timeout to the kubernetes (default 2s)
--timeout.reconciliation duration The reconciliation timeout to the ArangoDB CR (default 1m0s)
--timeout.shard-rebuild duration Timeout after which particular out-synced shard is considered as failed and rebuild is triggered (default 1h0m0s)

View file

@ -147,15 +147,16 @@ var (
concurrentUploads int
}
operatorTimeouts struct {
k8s time.Duration
arangoD time.Duration
arangoDCheck time.Duration
reconciliation time.Duration
agency time.Duration
shardRebuild time.Duration
shardRebuildRetry time.Duration
backupArangoD time.Duration
backupUploadArangoD time.Duration
k8s time.Duration
arangoD time.Duration
arangoDCheck time.Duration
reconciliation time.Duration
agency time.Duration
shardRebuild time.Duration
shardRebuildRetry time.Duration
backupArangoD time.Duration
backupUploadArangoD time.Duration
forcePodDeletionGracePeriod time.Duration
}
operatorImageDiscovery struct {
timeout time.Duration
@ -224,6 +225,7 @@ func init() {
f.DurationVar(&operatorTimeouts.shardRebuildRetry, "timeout.shard-rebuild-retry", globals.DefaultOutSyncedShardRebuildRetryTimeout, "Timeout after which rebuild shards retry flow is triggered")
f.DurationVar(&operatorTimeouts.backupArangoD, "timeout.backup-arangod", globals.BackupDefaultArangoClientTimeout, "The request timeout to the ArangoDB during backup calls")
f.DurationVar(&operatorTimeouts.backupUploadArangoD, "timeout.backup-upload", globals.BackupUploadArangoClientTimeout, "The request timeout to the ArangoDB during uploading files")
f.DurationVar(&operatorTimeouts.forcePodDeletionGracePeriod, "timeout.force-delete-pod-grace-period", globals.DefaultForcePodDeletionGracePeriodTimeout, "Default period when ArangoDB Pod should be forcefully removed after all containers were stopped - set to 0 to disable forceful removals")
f.DurationVar(&shutdownOptions.delay, "shutdown.delay", defaultShutdownDelay, "The delay before running shutdown handlers")
f.DurationVar(&shutdownOptions.timeout, "shutdown.timeout", defaultShutdownTimeout, "Timeout for shutdown handlers")
f.DurationVar(&operatorReconciliationRetry.delay, "operator.reconciliation.retry.delay", globals.DefaultOperatorUpdateRetryDelay, "Delay between Object Update operations in the Reconciliation loop")
@ -291,6 +293,7 @@ func executeMain(cmd *cobra.Command, args []string) {
globals.GetGlobalTimeouts().ShardRebuildRetry().Set(operatorTimeouts.shardRebuildRetry)
globals.GetGlobalTimeouts().BackupArangoClientTimeout().Set(operatorTimeouts.backupArangoD)
globals.GetGlobalTimeouts().BackupArangoClientUploadTimeout().Set(operatorTimeouts.backupUploadArangoD)
globals.GetGlobalTimeouts().ForcePodDeletionGracePeriodTimeout().Set(operatorTimeouts.forcePodDeletionGracePeriod)
globals.GetGlobals().Retry().OperatorUpdateRetryDelay().Set(operatorReconciliationRetry.delay)
globals.GetGlobals().Retry().OperatorUpdateRetryCount().Set(operatorReconciliationRetry.count)

View file

@ -57,7 +57,6 @@ const (
// we will mark the pod as scheduled for termination
recheckSoonPodInspectorInterval = util.Interval(time.Second) // Time between Pod inspection if we think something will change soon
maxPodInspectorInterval = util.Interval(time.Hour) // Maximum time between Pod inspection (if nothing else happens)
forcePodDeletionGracePeriod = 15 * time.Minute
)
func (r *Resources) handleRestartedPod(pod *core.Pod, memberStatus *api.MemberStatus, wasTerminated, markAsTerminated *bool) {
@ -426,13 +425,15 @@ func (r *Resources) InspectPods(ctx context.Context, cachedStatus inspectorInter
var gps int64 = 10
forceDelete := false
if t := k8sutil.PodStopTime(pod); !t.IsZero() {
if time.Since(t) > forcePodDeletionGracePeriod {
forceDelete = true
}
} else if t := pod.DeletionTimestamp; t != nil {
if time.Since(t.Time) > forcePodDeletionGracePeriod {
forceDelete = true
if gracePeriod := globals.GetGlobalTimeouts().ForcePodDeletionGracePeriodTimeout().Get(); gracePeriod > 0 {
if t := k8sutil.PodStopTime(pod); !t.IsZero() {
if time.Since(t) > gracePeriod {
forceDelete = true
}
} else if t := pod.DeletionTimestamp; t != nil {
if time.Since(t.Time) > gracePeriod {
forceDelete = true
}
}
}

View file

@ -1,7 +1,7 @@
//
// DISCLAIMER
//
// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany
// Copyright 2016-2024 ArangoDB GmbH, Cologne, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -23,11 +23,12 @@ package globals
import "time"
const (
DefaultKubernetesTimeout = 2 * time.Second
DefaultArangoDTimeout = time.Second * 5
DefaultArangoDAgencyTimeout = time.Second * 10
DefaultArangoDCheckTimeout = time.Second * 2
DefaultReconciliationTimeout = time.Minute
DefaultKubernetesTimeout = 2 * time.Second
DefaultArangoDTimeout = time.Second * 5
DefaultArangoDAgencyTimeout = time.Second * 10
DefaultArangoDCheckTimeout = time.Second * 2
DefaultReconciliationTimeout = time.Minute
DefaultForcePodDeletionGracePeriodTimeout = 15 * time.Minute
BackupDefaultArangoClientTimeout = 30 * time.Second
BackupUploadArangoClientTimeout = 300 * time.Second
@ -50,15 +51,16 @@ const (
var globalObj = &globals{
timeouts: &globalTimeouts{
requests: NewTimeout(DefaultKubernetesTimeout),
arangod: NewTimeout(DefaultArangoDTimeout),
arangodCheck: NewTimeout(DefaultArangoDCheckTimeout),
reconciliation: NewTimeout(DefaultReconciliationTimeout),
agency: NewTimeout(DefaultArangoDAgencyTimeout),
shardRebuild: NewTimeout(DefaultOutSyncedShardRebuildTimeout),
shardRebuildRetry: NewTimeout(DefaultOutSyncedShardRebuildRetryTimeout),
backupArangoClientTimeout: NewTimeout(BackupDefaultArangoClientTimeout),
backupArangoClientUploadTimeout: NewTimeout(BackupUploadArangoClientTimeout),
requests: NewTimeout(DefaultKubernetesTimeout),
arangod: NewTimeout(DefaultArangoDTimeout),
arangodCheck: NewTimeout(DefaultArangoDCheckTimeout),
reconciliation: NewTimeout(DefaultReconciliationTimeout),
agency: NewTimeout(DefaultArangoDAgencyTimeout),
shardRebuild: NewTimeout(DefaultOutSyncedShardRebuildTimeout),
shardRebuildRetry: NewTimeout(DefaultOutSyncedShardRebuildRetryTimeout),
backupArangoClientTimeout: NewTimeout(BackupDefaultArangoClientTimeout),
backupArangoClientUploadTimeout: NewTimeout(BackupUploadArangoClientTimeout),
forcePodDeletionGracePeriodTimeout: NewTimeout(DefaultForcePodDeletionGracePeriodTimeout),
},
kubernetes: &globalKubernetes{
requestBatchSize: NewInt64(DefaultKubernetesRequestBatchSize),
@ -144,6 +146,8 @@ type GlobalTimeouts interface {
ArangoDCheck() Timeout
Agency() Timeout
ForcePodDeletionGracePeriodTimeout() Timeout
BackupArangoClientTimeout() Timeout
BackupArangoClientUploadTimeout() Timeout
}
@ -152,6 +156,11 @@ type globalTimeouts struct {
requests, arangod, reconciliation, arangodCheck, agency, shardRebuild, shardRebuildRetry Timeout
backupArangoClientTimeout Timeout
backupArangoClientUploadTimeout Timeout
forcePodDeletionGracePeriodTimeout Timeout
}
func (g *globalTimeouts) ForcePodDeletionGracePeriodTimeout() Timeout {
return g.forcePodDeletionGracePeriodTimeout
}
func (g *globalTimeouts) Agency() Timeout {