mirror of
https://github.com/arangodb/kube-arangodb.git
synced 2024-12-14 11:57:37 +00:00
GT-313 Fix for ContextExceeded error during backup upload (#1263)
This commit is contained in:
parent
0cd5a2a65f
commit
a3206f0c58
7 changed files with 53 additions and 33 deletions
|
@ -7,6 +7,7 @@
|
|||
- (Bugfix) Fix agency timeout
|
||||
- (Improvement) Extract Agency Timeout
|
||||
- (Feature) Rebalancer V2
|
||||
- (Bugfix) Fix for ContextExceeded error during backup upload
|
||||
|
||||
## [1.2.30](https://github.com/arangodb/kube-arangodb/tree/1.2.30) (2023-06-16)
|
||||
- (Feature) AgencyCache Interface
|
||||
|
|
21
cmd/cmd.go
21
cmd/cmd.go
|
@ -155,13 +155,15 @@ var (
|
|||
concurrentUploads int
|
||||
}
|
||||
operatorTimeouts struct {
|
||||
k8s time.Duration
|
||||
arangoD time.Duration
|
||||
arangoDCheck time.Duration
|
||||
reconciliation time.Duration
|
||||
agency time.Duration
|
||||
shardRebuild time.Duration
|
||||
shardRebuildRetry time.Duration
|
||||
k8s time.Duration
|
||||
arangoD time.Duration
|
||||
arangoDCheck time.Duration
|
||||
reconciliation time.Duration
|
||||
agency time.Duration
|
||||
shardRebuild time.Duration
|
||||
shardRebuildRetry time.Duration
|
||||
backupArangoD time.Duration
|
||||
backupUploadArangoD time.Duration
|
||||
}
|
||||
chaosOptions struct {
|
||||
allowed bool
|
||||
|
@ -216,6 +218,8 @@ func init() {
|
|||
f.DurationVar(&operatorTimeouts.reconciliation, "timeout.reconciliation", globals.DefaultReconciliationTimeout, "The reconciliation timeout to the ArangoDB CR")
|
||||
f.DurationVar(&operatorTimeouts.shardRebuild, "timeout.shard-rebuild", globals.DefaultOutSyncedShardRebuildTimeout, "Timeout after which particular out-synced shard is considered as failed and rebuild is triggered")
|
||||
f.DurationVar(&operatorTimeouts.shardRebuildRetry, "timeout.shard-rebuild-retry", globals.DefaultOutSyncedShardRebuildRetryTimeout, "Timeout after which rebuild shards retry flow is triggered")
|
||||
f.DurationVar(&operatorTimeouts.backupArangoD, "timeout.backup-arangod", globals.BackupDefaultArangoClientTimeout, "The request timeout to the ArangoDB during backup calls")
|
||||
f.DurationVar(&operatorTimeouts.backupUploadArangoD, "timeout.backup-upload", globals.BackupUploadArangoClientTimeout, "The request timeout to the ArangoDB during uploading files")
|
||||
f.DurationVar(&shutdownOptions.delay, "shutdown.delay", defaultShutdownDelay, "The delay before running shutdown handlers")
|
||||
f.DurationVar(&shutdownOptions.timeout, "shutdown.timeout", defaultShutdownTimeout, "Timeout for shutdown handlers")
|
||||
f.BoolVar(&operatorOptions.scalingIntegrationEnabled, "internal.scaling-integration", false, "Enable Scaling Integration")
|
||||
|
@ -267,6 +271,9 @@ func executeMain(cmd *cobra.Command, args []string) {
|
|||
globals.GetGlobalTimeouts().Reconciliation().Set(operatorTimeouts.reconciliation)
|
||||
globals.GetGlobalTimeouts().ShardRebuild().Set(operatorTimeouts.shardRebuild)
|
||||
globals.GetGlobalTimeouts().ShardRebuildRetry().Set(operatorTimeouts.shardRebuildRetry)
|
||||
globals.GetGlobalTimeouts().BackupArangoClientTimeout().Set(operatorTimeouts.backupArangoD)
|
||||
globals.GetGlobalTimeouts().BackupArangoClientUploadTimeout().Set(operatorTimeouts.backupUploadArangoD)
|
||||
|
||||
globals.GetGlobals().Kubernetes().RequestBatchSize().Set(operatorKubernetesOptions.maxBatchSize)
|
||||
globals.GetGlobals().Backup().ConcurrentUploads().Set(operatorBackup.concurrentUploads)
|
||||
|
||||
|
|
|
@ -63,7 +63,7 @@ func newArangoClientBackupFactory(handler *handler) ArangoClientFactory {
|
|||
}
|
||||
|
||||
func (ac *arangoClientBackupImpl) List() (map[driver.BackupID]driver.BackupMeta, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), defaultArangoClientTimeout)
|
||||
ctx, cancel := globals.GetGlobalTimeouts().BackupArangoClientTimeout().WithTimeout(context.Background())
|
||||
defer cancel()
|
||||
|
||||
backups, err := ac.driver.Backup().List(ctx, nil)
|
||||
|
@ -75,7 +75,7 @@ func (ac *arangoClientBackupImpl) List() (map[driver.BackupID]driver.BackupMeta,
|
|||
}
|
||||
|
||||
func (ac *arangoClientBackupImpl) Create() (ArangoBackupCreateResponse, error) {
|
||||
dt := defaultArangoClientTimeout
|
||||
dt := globals.GetGlobalTimeouts().BackupArangoClientTimeout().Get()
|
||||
|
||||
co := driver.BackupCreateOptions{}
|
||||
|
||||
|
@ -110,7 +110,7 @@ func (ac *arangoClientBackupImpl) Create() (ArangoBackupCreateResponse, error) {
|
|||
}
|
||||
|
||||
func (ac *arangoClientBackupImpl) Get(backupID driver.BackupID) (driver.BackupMeta, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), defaultArangoClientTimeout)
|
||||
ctx, cancel := globals.GetGlobalTimeouts().BackupArangoClientTimeout().WithTimeout(context.Background())
|
||||
defer cancel()
|
||||
|
||||
// list, err := ac.driver.Backup().List(ctx, &driver.BackupListOptions{ID: backupID})
|
||||
|
@ -148,7 +148,7 @@ func (ac *arangoClientBackupImpl) getCredentialsFromSecret(ctx context.Context,
|
|||
}
|
||||
|
||||
func (ac *arangoClientBackupImpl) Upload(backupID driver.BackupID) (driver.BackupTransferJobID, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), defaultArangoClientTimeout)
|
||||
ctx, cancel := globals.GetGlobalTimeouts().BackupArangoClientUploadTimeout().WithTimeout(context.Background())
|
||||
defer cancel()
|
||||
|
||||
uploadSpec := ac.backup.Spec.Upload
|
||||
|
@ -165,7 +165,7 @@ func (ac *arangoClientBackupImpl) Upload(backupID driver.BackupID) (driver.Backu
|
|||
}
|
||||
|
||||
func (ac *arangoClientBackupImpl) Download(backupID driver.BackupID) (driver.BackupTransferJobID, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), defaultArangoClientTimeout)
|
||||
ctx, cancel := globals.GetGlobalTimeouts().BackupArangoClientUploadTimeout().WithTimeout(context.Background())
|
||||
defer cancel()
|
||||
|
||||
downloadSpec := ac.backup.Spec.Download
|
||||
|
@ -182,7 +182,7 @@ func (ac *arangoClientBackupImpl) Download(backupID driver.BackupID) (driver.Bac
|
|||
}
|
||||
|
||||
func (ac *arangoClientBackupImpl) Progress(jobID driver.BackupTransferJobID) (ArangoBackupProgress, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), defaultArangoClientTimeout)
|
||||
ctx, cancel := globals.GetGlobalTimeouts().BackupArangoClientTimeout().WithTimeout(context.Background())
|
||||
defer cancel()
|
||||
|
||||
report, err := ac.driver.Backup().Progress(ctx, jobID)
|
||||
|
@ -243,14 +243,14 @@ func (ac *arangoClientBackupImpl) Exists(backupID driver.BackupID) (bool, error)
|
|||
}
|
||||
|
||||
func (ac *arangoClientBackupImpl) Delete(backupID driver.BackupID) error {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), defaultArangoClientTimeout)
|
||||
ctx, cancel := globals.GetGlobalTimeouts().BackupArangoClientTimeout().WithTimeout(context.Background())
|
||||
defer cancel()
|
||||
|
||||
return ac.driver.Backup().Delete(ctx, backupID)
|
||||
}
|
||||
|
||||
func (ac *arangoClientBackupImpl) Abort(jobID driver.BackupTransferJobID) error {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), defaultArangoClientTimeout)
|
||||
ctx, cancel := globals.GetGlobalTimeouts().BackupArangoClientTimeout().WithTimeout(context.Background())
|
||||
defer cancel()
|
||||
|
||||
return ac.driver.Backup().Abort(ctx, jobID)
|
||||
|
|
|
@ -52,11 +52,9 @@ func newFakeHandler() *handler {
|
|||
k := fake.NewSimpleClientset()
|
||||
|
||||
return &handler{
|
||||
client: f,
|
||||
kubeClient: k,
|
||||
|
||||
arangoClientTimeout: defaultArangoClientTimeout,
|
||||
eventRecorder: newEventInstance(event.NewEventRecorder("mock", k)),
|
||||
client: f,
|
||||
kubeClient: k,
|
||||
eventRecorder: newEventInstance(event.NewEventRecorder("mock", k)),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -50,9 +50,8 @@ import (
|
|||
var logger = logging.Global().RegisterAndGetLogger("backup-operator", logging.Info)
|
||||
|
||||
const (
|
||||
defaultArangoClientTimeout = 30 * time.Second
|
||||
retryCount = 25
|
||||
retryDelay = time.Second
|
||||
retryCount = 25
|
||||
retryDelay = time.Second
|
||||
|
||||
// StateChange name of the event send when state changed
|
||||
StateChange = "StateChange"
|
||||
|
@ -71,7 +70,6 @@ type handler struct {
|
|||
eventRecorder event.RecorderInstance
|
||||
|
||||
arangoClientFactory ArangoClientFactory
|
||||
arangoClientTimeout time.Duration
|
||||
|
||||
operator operator.Operator
|
||||
}
|
||||
|
|
|
@ -53,8 +53,6 @@ func RegisterInformer(operator operator.Operator, recorder event.Recorder, clien
|
|||
eventRecorder: newEventInstance(recorder),
|
||||
|
||||
operator: operator,
|
||||
|
||||
arangoClientTimeout: defaultArangoClientTimeout,
|
||||
}
|
||||
h.arangoClientFactory = newArangoClientBackupFactory(h)
|
||||
|
||||
|
|
|
@ -29,6 +29,9 @@ const (
|
|||
DefaultArangoDCheckTimeout = time.Second * 2
|
||||
DefaultReconciliationTimeout = time.Minute
|
||||
|
||||
BackupDefaultArangoClientTimeout = 30 * time.Second
|
||||
BackupUploadArangoClientTimeout = 300 * time.Second
|
||||
|
||||
// DefaultOutSyncedShardRebuildTimeout
|
||||
// timeout after which particular out-synced shard is considered as failed and rebuild is triggered
|
||||
DefaultOutSyncedShardRebuildTimeout = time.Minute * 60
|
||||
|
@ -42,13 +45,15 @@ const (
|
|||
|
||||
var globalObj = &globals{
|
||||
timeouts: &globalTimeouts{
|
||||
requests: NewTimeout(DefaultKubernetesTimeout),
|
||||
arangod: NewTimeout(DefaultArangoDTimeout),
|
||||
arangodCheck: NewTimeout(DefaultArangoDCheckTimeout),
|
||||
reconciliation: NewTimeout(DefaultReconciliationTimeout),
|
||||
agency: NewTimeout(DefaultArangoDAgencyTimeout),
|
||||
shardRebuild: NewTimeout(DefaultOutSyncedShardRebuildTimeout),
|
||||
shardRebuildRetry: NewTimeout(DefaultOutSyncedShardRebuildRetryTimeout),
|
||||
requests: NewTimeout(DefaultKubernetesTimeout),
|
||||
arangod: NewTimeout(DefaultArangoDTimeout),
|
||||
arangodCheck: NewTimeout(DefaultArangoDCheckTimeout),
|
||||
reconciliation: NewTimeout(DefaultReconciliationTimeout),
|
||||
agency: NewTimeout(DefaultArangoDAgencyTimeout),
|
||||
shardRebuild: NewTimeout(DefaultOutSyncedShardRebuildTimeout),
|
||||
shardRebuildRetry: NewTimeout(DefaultOutSyncedShardRebuildRetryTimeout),
|
||||
backupArangoClientTimeout: NewTimeout(BackupDefaultArangoClientTimeout),
|
||||
backupArangoClientUploadTimeout: NewTimeout(BackupUploadArangoClientTimeout),
|
||||
},
|
||||
kubernetes: &globalKubernetes{
|
||||
requestBatchSize: NewInt64(DefaultKubernetesRequestBatchSize),
|
||||
|
@ -123,10 +128,15 @@ type GlobalTimeouts interface {
|
|||
ArangoD() Timeout
|
||||
ArangoDCheck() Timeout
|
||||
Agency() Timeout
|
||||
|
||||
BackupArangoClientTimeout() Timeout
|
||||
BackupArangoClientUploadTimeout() Timeout
|
||||
}
|
||||
|
||||
type globalTimeouts struct {
|
||||
requests, arangod, reconciliation, arangodCheck, agency, shardRebuild, shardRebuildRetry Timeout
|
||||
backupArangoClientTimeout Timeout
|
||||
backupArangoClientUploadTimeout Timeout
|
||||
}
|
||||
|
||||
func (g *globalTimeouts) Agency() Timeout {
|
||||
|
@ -156,3 +166,11 @@ func (g *globalTimeouts) ArangoD() Timeout {
|
|||
func (g *globalTimeouts) Kubernetes() Timeout {
|
||||
return g.requests
|
||||
}
|
||||
|
||||
func (g *globalTimeouts) BackupArangoClientTimeout() Timeout {
|
||||
return g.backupArangoClientTimeout
|
||||
}
|
||||
|
||||
func (g *globalTimeouts) BackupArangoClientUploadTimeout() Timeout {
|
||||
return g.backupArangoClientUploadTimeout
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue