1
0
Fork 0
mirror of https://github.com/arangodb/kube-arangodb.git synced 2024-12-14 11:57:37 +00:00

[Feature] Parametrize ForceDelete timeout (#1632)

This commit is contained in:
Adam Janikowski 2024-03-25 13:57:16 +01:00 committed by GitHub
parent 386efa1818
commit cbb16bce9a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 48 additions and 33 deletions

View file

@ -13,6 +13,7 @@
- (Bugfix) Remove ImagePullSecrets Reference from Container
- (Feature) DebugPackage ArangoProfiles
- (Feature) Scheduler CLI
- (Feature) Parametrize ForceDelete timeout
## [1.2.39](https://github.com/arangodb/kube-arangodb/tree/1.2.39) (2024-03-11)
- (Feature) Extract Scheduler API

View file

@ -168,7 +168,7 @@ Flags:
--kubernetes.max-batch-size int Size of batch during objects read (default 256)
--kubernetes.qps float32 Number of queries per second for k8s API (default 15)
--log.format string Set log format. Allowed values: 'pretty', 'JSON'. If empty, default format is used (default "pretty")
--log.level stringArray Set log levels in format <level> or <logger>=<level>. Possible loggers: action, agency, api-server, assertion, backup-operator, chaos-monkey, crd, deployment, deployment-ci, deployment-reconcile, deployment-replication, deployment-resilience, deployment-resources, deployment-storage, deployment-storage-pc, deployment-storage-service, http, inspector, integrations, k8s-client, ml-batchjob-operator, ml-cronjob-operator, ml-extension-operator, ml-extension-shutdown, ml-storage-operator, monitor, operator, operator-arangojob-handler, operator-v2, operator-v2-event, operator-v2-worker, panics, pod_compare, root, root-event-recorder, server, server-authentication (default [info])
--log.level stringArray Set log levels in format <level> or <logger>=<level>. Possible loggers: action, agency, api-server, assertion, backup-operator, chaos-monkey, crd, deployment, deployment-ci, deployment-reconcile, deployment-replication, deployment-resilience, deployment-resources, deployment-storage, deployment-storage-pc, deployment-storage-service, http, inspector, integrations, k8s-client, ml-batchjob-operator, ml-cronjob-operator, ml-extension-operator, ml-extension-shutdown, ml-storage-operator, monitor, operator, operator-arangojob-handler, operator-v2, operator-v2-event, operator-v2-worker, panics, pod_compare, root, root-event-recorder, scheduler, server, server-authentication (default [info])
--log.sampling If true, operator will try to minimize duplication of logging events (default true)
--memory-limit uint Define memory limit for hard shutdown and the dump of goroutines. Used for testing
--metrics.excluded-prefixes stringArray List of the excluded metrics prefixes
@ -196,6 +196,7 @@ Flags:
--timeout.arangod-check duration The version check request timeout to the ArangoDB (default 2s)
--timeout.backup-arangod duration The request timeout to the ArangoDB during backup calls (default 30s)
--timeout.backup-upload duration The request timeout to the ArangoDB during uploading files (default 5m0s)
--timeout.force-delete-pod-grace-period duration Default period when ArangoDB Pod should be forcefully removed after all containers were stopped - set to 0 to disable forceful removals (default 15m0s)
--timeout.k8s duration The request timeout to the kubernetes (default 2s)
--timeout.reconciliation duration The reconciliation timeout to the ArangoDB CR (default 1m0s)
--timeout.shard-rebuild duration Timeout after which particular out-synced shard is considered as failed and rebuild is triggered (default 1h0m0s)

View file

@ -156,6 +156,7 @@ var (
shardRebuildRetry time.Duration
backupArangoD time.Duration
backupUploadArangoD time.Duration
forcePodDeletionGracePeriod time.Duration
}
operatorImageDiscovery struct {
timeout time.Duration
@ -224,6 +225,7 @@ func init() {
f.DurationVar(&operatorTimeouts.shardRebuildRetry, "timeout.shard-rebuild-retry", globals.DefaultOutSyncedShardRebuildRetryTimeout, "Timeout after which rebuild shards retry flow is triggered")
f.DurationVar(&operatorTimeouts.backupArangoD, "timeout.backup-arangod", globals.BackupDefaultArangoClientTimeout, "The request timeout to the ArangoDB during backup calls")
f.DurationVar(&operatorTimeouts.backupUploadArangoD, "timeout.backup-upload", globals.BackupUploadArangoClientTimeout, "The request timeout to the ArangoDB during uploading files")
f.DurationVar(&operatorTimeouts.forcePodDeletionGracePeriod, "timeout.force-delete-pod-grace-period", globals.DefaultForcePodDeletionGracePeriodTimeout, "Default period when ArangoDB Pod should be forcefully removed after all containers were stopped - set to 0 to disable forceful removals")
f.DurationVar(&shutdownOptions.delay, "shutdown.delay", defaultShutdownDelay, "The delay before running shutdown handlers")
f.DurationVar(&shutdownOptions.timeout, "shutdown.timeout", defaultShutdownTimeout, "Timeout for shutdown handlers")
f.DurationVar(&operatorReconciliationRetry.delay, "operator.reconciliation.retry.delay", globals.DefaultOperatorUpdateRetryDelay, "Delay between Object Update operations in the Reconciliation loop")
@ -291,6 +293,7 @@ func executeMain(cmd *cobra.Command, args []string) {
globals.GetGlobalTimeouts().ShardRebuildRetry().Set(operatorTimeouts.shardRebuildRetry)
globals.GetGlobalTimeouts().BackupArangoClientTimeout().Set(operatorTimeouts.backupArangoD)
globals.GetGlobalTimeouts().BackupArangoClientUploadTimeout().Set(operatorTimeouts.backupUploadArangoD)
globals.GetGlobalTimeouts().ForcePodDeletionGracePeriodTimeout().Set(operatorTimeouts.forcePodDeletionGracePeriod)
globals.GetGlobals().Retry().OperatorUpdateRetryDelay().Set(operatorReconciliationRetry.delay)
globals.GetGlobals().Retry().OperatorUpdateRetryCount().Set(operatorReconciliationRetry.count)

View file

@ -57,7 +57,6 @@ const (
// we will mark the pod as scheduled for termination
recheckSoonPodInspectorInterval = util.Interval(time.Second) // Time between Pod inspection if we think something will change soon
maxPodInspectorInterval = util.Interval(time.Hour) // Maximum time between Pod inspection (if nothing else happens)
forcePodDeletionGracePeriod = 15 * time.Minute
)
func (r *Resources) handleRestartedPod(pod *core.Pod, memberStatus *api.MemberStatus, wasTerminated, markAsTerminated *bool) {
@ -426,15 +425,17 @@ func (r *Resources) InspectPods(ctx context.Context, cachedStatus inspectorInter
var gps int64 = 10
forceDelete := false
if gracePeriod := globals.GetGlobalTimeouts().ForcePodDeletionGracePeriodTimeout().Get(); gracePeriod > 0 {
if t := k8sutil.PodStopTime(pod); !t.IsZero() {
if time.Since(t) > forcePodDeletionGracePeriod {
if time.Since(t) > gracePeriod {
forceDelete = true
}
} else if t := pod.DeletionTimestamp; t != nil {
if time.Since(t.Time) > forcePodDeletionGracePeriod {
if time.Since(t.Time) > gracePeriod {
forceDelete = true
}
}
}
if forceDelete {
gps = 0

View file

@ -1,7 +1,7 @@
//
// DISCLAIMER
//
// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany
// Copyright 2016-2024 ArangoDB GmbH, Cologne, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -28,6 +28,7 @@ const (
DefaultArangoDAgencyTimeout = time.Second * 10
DefaultArangoDCheckTimeout = time.Second * 2
DefaultReconciliationTimeout = time.Minute
DefaultForcePodDeletionGracePeriodTimeout = 15 * time.Minute
BackupDefaultArangoClientTimeout = 30 * time.Second
BackupUploadArangoClientTimeout = 300 * time.Second
@ -59,6 +60,7 @@ var globalObj = &globals{
shardRebuildRetry: NewTimeout(DefaultOutSyncedShardRebuildRetryTimeout),
backupArangoClientTimeout: NewTimeout(BackupDefaultArangoClientTimeout),
backupArangoClientUploadTimeout: NewTimeout(BackupUploadArangoClientTimeout),
forcePodDeletionGracePeriodTimeout: NewTimeout(DefaultForcePodDeletionGracePeriodTimeout),
},
kubernetes: &globalKubernetes{
requestBatchSize: NewInt64(DefaultKubernetesRequestBatchSize),
@ -144,6 +146,8 @@ type GlobalTimeouts interface {
ArangoDCheck() Timeout
Agency() Timeout
ForcePodDeletionGracePeriodTimeout() Timeout
BackupArangoClientTimeout() Timeout
BackupArangoClientUploadTimeout() Timeout
}
@ -152,6 +156,11 @@ type globalTimeouts struct {
requests, arangod, reconciliation, arangodCheck, agency, shardRebuild, shardRebuildRetry Timeout
backupArangoClientTimeout Timeout
backupArangoClientUploadTimeout Timeout
forcePodDeletionGracePeriodTimeout Timeout
}
func (g *globalTimeouts) ForcePodDeletionGracePeriodTimeout() Timeout {
return g.forcePodDeletionGracePeriodTimeout
}
func (g *globalTimeouts) Agency() Timeout {