mirror of
https://github.com/arangodb/kube-arangodb.git
synced 2024-12-14 11:57:37 +00:00
[Feature] Parametrize Scheduling Graceful Duration (#1641)
This commit is contained in:
parent
a4d7331a0c
commit
1d86f4ee29
6 changed files with 39 additions and 1 deletions
|
@ -20,6 +20,7 @@
|
|||
- (Maintenance) Update Go to 1.22.2
|
||||
- (Feature) Object Checksum
|
||||
- (Bugfix) Use Rendered Spec in case of scheduling compare
|
||||
- (Feature) Parametrize Scheduling Graceful Duration
|
||||
|
||||
## [1.2.39](https://github.com/arangodb/kube-arangodb/tree/1.2.39) (2024-03-11)
|
||||
- (Feature) Extract Scheduler API
|
||||
|
|
|
@ -198,6 +198,7 @@ Flags:
|
|||
--timeout.backup-upload duration The request timeout to the ArangoDB during uploading files (default 5m0s)
|
||||
--timeout.force-delete-pod-grace-period duration Default period when ArangoDB Pod should be forcefully removed after all containers were stopped - set to 0 to disable forceful removals (default 15m0s)
|
||||
--timeout.k8s duration The request timeout to the kubernetes (default 2s)
|
||||
--timeout.pod-scheduling-grace-period duration Default period when ArangoDB Pod should be deleted in case of scheduling info change - set to 0 to disable (default 15s)
|
||||
--timeout.reconciliation duration The reconciliation timeout to the ArangoDB CR (default 1m0s)
|
||||
--timeout.shard-rebuild duration Timeout after which particular out-synced shard is considered as failed and rebuild is triggered (default 1h0m0s)
|
||||
--timeout.shard-rebuild-retry duration Timeout after which rebuild shards retry flow is triggered (default 4h0m0s)
|
||||
|
|
|
@ -157,6 +157,7 @@ var (
|
|||
backupArangoD time.Duration
|
||||
backupUploadArangoD time.Duration
|
||||
forcePodDeletionGracePeriod time.Duration
|
||||
podSchedulingGracePeriod time.Duration
|
||||
}
|
||||
operatorImageDiscovery struct {
|
||||
timeout time.Duration
|
||||
|
@ -226,6 +227,7 @@ func init() {
|
|||
f.DurationVar(&operatorTimeouts.backupArangoD, "timeout.backup-arangod", globals.BackupDefaultArangoClientTimeout, "The request timeout to the ArangoDB during backup calls")
|
||||
f.DurationVar(&operatorTimeouts.backupUploadArangoD, "timeout.backup-upload", globals.BackupUploadArangoClientTimeout, "The request timeout to the ArangoDB during uploading files")
|
||||
f.DurationVar(&operatorTimeouts.forcePodDeletionGracePeriod, "timeout.force-delete-pod-grace-period", globals.DefaultForcePodDeletionGracePeriodTimeout, "Default period when ArangoDB Pod should be forcefully removed after all containers were stopped - set to 0 to disable forceful removals")
|
||||
f.DurationVar(&operatorTimeouts.podSchedulingGracePeriod, "timeout.pod-scheduling-grace-period", globals.DefaultPodSchedulingGracePeriod, "Default period when ArangoDB Pod should be deleted in case of scheduling info change - set to 0 to disable")
|
||||
f.DurationVar(&shutdownOptions.delay, "shutdown.delay", defaultShutdownDelay, "The delay before running shutdown handlers")
|
||||
f.DurationVar(&shutdownOptions.timeout, "shutdown.timeout", defaultShutdownTimeout, "Timeout for shutdown handlers")
|
||||
f.DurationVar(&operatorReconciliationRetry.delay, "operator.reconciliation.retry.delay", globals.DefaultOperatorUpdateRetryDelay, "Delay between Object Update operations in the Reconciliation loop")
|
||||
|
@ -294,6 +296,7 @@ func executeMain(cmd *cobra.Command, args []string) {
|
|||
globals.GetGlobalTimeouts().BackupArangoClientTimeout().Set(operatorTimeouts.backupArangoD)
|
||||
globals.GetGlobalTimeouts().BackupArangoClientUploadTimeout().Set(operatorTimeouts.backupUploadArangoD)
|
||||
globals.GetGlobalTimeouts().ForcePodDeletionGracePeriodTimeout().Set(operatorTimeouts.forcePodDeletionGracePeriod)
|
||||
globals.GetGlobalTimeouts().PodSchedulingGracePeriod().Set(operatorTimeouts.podSchedulingGracePeriod)
|
||||
|
||||
globals.GetGlobals().Retry().OperatorUpdateRetryDelay().Set(operatorReconciliationRetry.delay)
|
||||
globals.GetGlobals().Retry().OperatorUpdateRetryCount().Set(operatorReconciliationRetry.count)
|
||||
|
|
|
@ -23,12 +23,14 @@ package reconcile
|
|||
import (
|
||||
"context"
|
||||
"reflect"
|
||||
"time"
|
||||
|
||||
core "k8s.io/api/core/v1"
|
||||
|
||||
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
|
||||
"github.com/arangodb/kube-arangodb/pkg/deployment/actions"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/globals"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
|
||||
)
|
||||
|
||||
|
@ -38,6 +40,12 @@ func (r *Reconciler) createMemberPodSchedulingFailurePlan(ctx context.Context,
|
|||
_ k8sutil.APIObject, spec api.DeploymentSpec, status api.DeploymentStatus, context PlanBuilderContext) api.Plan {
|
||||
|
||||
var p api.Plan
|
||||
|
||||
if globals.GetGlobalTimeouts().PodSchedulingGracePeriod().Get() == 0 {
|
||||
// Scheduling grace period is not enabled
|
||||
return nil
|
||||
}
|
||||
|
||||
if !status.Conditions.IsTrue(api.ConditionTypePodSchedulingFailure) {
|
||||
return p
|
||||
}
|
||||
|
@ -55,6 +63,19 @@ func (r *Reconciler) createMemberPodSchedulingFailurePlan(ctx context.Context,
|
|||
continue
|
||||
}
|
||||
|
||||
if c, ok := m.Member.Conditions.Get(api.ConditionTypeScheduled); !ok {
|
||||
// Action cant proceed if pod is not scheduled
|
||||
continue
|
||||
} else if c.LastTransitionTime.IsZero() {
|
||||
// LastTransitionTime is not set
|
||||
continue
|
||||
} else {
|
||||
if time.Since(c.LastTransitionTime.Time) <= globals.GetGlobalTimeouts().PodSchedulingGracePeriod().Get() {
|
||||
// In grace period
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
imageInfo, imageFound := context.SelectImageForMember(spec, status, m.Member)
|
||||
if !imageFound {
|
||||
l.Warn("could not find image for already created member")
|
||||
|
|
|
@ -393,6 +393,11 @@ func (r *Resources) InspectPods(ctx context.Context, cachedStatus inspectorInter
|
|||
nextInterval = nextInterval.ReduceTo(recheckSoonPodInspectorInterval)
|
||||
}
|
||||
} else {
|
||||
if memberStatus.Conditions.Update(api.ConditionTypeScheduled, false, "Pod is not scheduled", "") {
|
||||
updateMemberStatusNeeded = true
|
||||
nextInterval = nextInterval.ReduceTo(recheckSoonPodInspectorInterval)
|
||||
}
|
||||
|
||||
if k8sutil.IsPodNotScheduledFor(pod, podScheduleTimeout) {
|
||||
// Pod cannot be scheduled for to long
|
||||
log.Str("pod-name", pod.GetName()).Debug("Pod scheduling timeout")
|
||||
|
|
|
@ -29,6 +29,7 @@ const (
|
|||
DefaultArangoDCheckTimeout = time.Second * 2
|
||||
DefaultReconciliationTimeout = time.Minute
|
||||
DefaultForcePodDeletionGracePeriodTimeout = 15 * time.Minute
|
||||
DefaultPodSchedulingGracePeriod = 15 * time.Second
|
||||
|
||||
BackupDefaultArangoClientTimeout = 30 * time.Second
|
||||
BackupUploadArangoClientTimeout = 300 * time.Second
|
||||
|
@ -61,6 +62,7 @@ var globalObj = &globals{
|
|||
backupArangoClientTimeout: NewTimeout(BackupDefaultArangoClientTimeout),
|
||||
backupArangoClientUploadTimeout: NewTimeout(BackupUploadArangoClientTimeout),
|
||||
forcePodDeletionGracePeriodTimeout: NewTimeout(DefaultForcePodDeletionGracePeriodTimeout),
|
||||
podSchedulingGracePeriod: NewTimeout(DefaultPodSchedulingGracePeriod),
|
||||
},
|
||||
kubernetes: &globalKubernetes{
|
||||
requestBatchSize: NewInt64(DefaultKubernetesRequestBatchSize),
|
||||
|
@ -147,6 +149,7 @@ type GlobalTimeouts interface {
|
|||
Agency() Timeout
|
||||
|
||||
ForcePodDeletionGracePeriodTimeout() Timeout
|
||||
PodSchedulingGracePeriod() Timeout
|
||||
|
||||
BackupArangoClientTimeout() Timeout
|
||||
BackupArangoClientUploadTimeout() Timeout
|
||||
|
@ -156,13 +159,17 @@ type globalTimeouts struct {
|
|||
requests, arangod, reconciliation, arangodCheck, agency, shardRebuild, shardRebuildRetry Timeout
|
||||
backupArangoClientTimeout Timeout
|
||||
backupArangoClientUploadTimeout Timeout
|
||||
forcePodDeletionGracePeriodTimeout Timeout
|
||||
forcePodDeletionGracePeriodTimeout, podSchedulingGracePeriod Timeout
|
||||
}
|
||||
|
||||
func (g *globalTimeouts) ForcePodDeletionGracePeriodTimeout() Timeout {
|
||||
return g.forcePodDeletionGracePeriodTimeout
|
||||
}
|
||||
|
||||
func (g *globalTimeouts) PodSchedulingGracePeriod() Timeout {
|
||||
return g.podSchedulingGracePeriod
|
||||
}
|
||||
|
||||
func (g *globalTimeouts) Agency() Timeout {
|
||||
return g.agency
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue