1
0
Fork 0
mirror of https://github.com/arangodb/kube-arangodb.git synced 2024-12-14 11:57:37 +00:00

GT-313 Fix for ContextExceeded error during backup upload (#1263)

This commit is contained in:
jwierzbo 2023-07-05 15:58:33 +02:00 committed by GitHub
parent 0cd5a2a65f
commit a3206f0c58
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 53 additions and 33 deletions

View file

@ -7,6 +7,7 @@
- (Bugfix) Fix agency timeout
- (Improvement) Extract Agency Timeout
- (Feature) Rebalancer V2
- (Bugfix) Fix for ContextExceeded error during backup upload
## [1.2.30](https://github.com/arangodb/kube-arangodb/tree/1.2.30) (2023-06-16)
- (Feature) AgencyCache Interface

View file

@ -155,13 +155,15 @@ var (
concurrentUploads int
}
operatorTimeouts struct {
k8s time.Duration
arangoD time.Duration
arangoDCheck time.Duration
reconciliation time.Duration
agency time.Duration
shardRebuild time.Duration
shardRebuildRetry time.Duration
k8s time.Duration
arangoD time.Duration
arangoDCheck time.Duration
reconciliation time.Duration
agency time.Duration
shardRebuild time.Duration
shardRebuildRetry time.Duration
backupArangoD time.Duration
backupUploadArangoD time.Duration
}
chaosOptions struct {
allowed bool
@ -216,6 +218,8 @@ func init() {
f.DurationVar(&operatorTimeouts.reconciliation, "timeout.reconciliation", globals.DefaultReconciliationTimeout, "The reconciliation timeout to the ArangoDB CR")
f.DurationVar(&operatorTimeouts.shardRebuild, "timeout.shard-rebuild", globals.DefaultOutSyncedShardRebuildTimeout, "Timeout after which particular out-synced shard is considered as failed and rebuild is triggered")
f.DurationVar(&operatorTimeouts.shardRebuildRetry, "timeout.shard-rebuild-retry", globals.DefaultOutSyncedShardRebuildRetryTimeout, "Timeout after which rebuild shards retry flow is triggered")
f.DurationVar(&operatorTimeouts.backupArangoD, "timeout.backup-arangod", globals.BackupDefaultArangoClientTimeout, "The request timeout to the ArangoDB during backup calls")
f.DurationVar(&operatorTimeouts.backupUploadArangoD, "timeout.backup-upload", globals.BackupUploadArangoClientTimeout, "The request timeout to the ArangoDB during uploading files")
f.DurationVar(&shutdownOptions.delay, "shutdown.delay", defaultShutdownDelay, "The delay before running shutdown handlers")
f.DurationVar(&shutdownOptions.timeout, "shutdown.timeout", defaultShutdownTimeout, "Timeout for shutdown handlers")
f.BoolVar(&operatorOptions.scalingIntegrationEnabled, "internal.scaling-integration", false, "Enable Scaling Integration")
@ -267,6 +271,9 @@ func executeMain(cmd *cobra.Command, args []string) {
globals.GetGlobalTimeouts().Reconciliation().Set(operatorTimeouts.reconciliation)
globals.GetGlobalTimeouts().ShardRebuild().Set(operatorTimeouts.shardRebuild)
globals.GetGlobalTimeouts().ShardRebuildRetry().Set(operatorTimeouts.shardRebuildRetry)
globals.GetGlobalTimeouts().BackupArangoClientTimeout().Set(operatorTimeouts.backupArangoD)
globals.GetGlobalTimeouts().BackupArangoClientUploadTimeout().Set(operatorTimeouts.backupUploadArangoD)
globals.GetGlobals().Kubernetes().RequestBatchSize().Set(operatorKubernetesOptions.maxBatchSize)
globals.GetGlobals().Backup().ConcurrentUploads().Set(operatorBackup.concurrentUploads)

View file

@ -63,7 +63,7 @@ func newArangoClientBackupFactory(handler *handler) ArangoClientFactory {
}
func (ac *arangoClientBackupImpl) List() (map[driver.BackupID]driver.BackupMeta, error) {
ctx, cancel := context.WithTimeout(context.Background(), defaultArangoClientTimeout)
ctx, cancel := globals.GetGlobalTimeouts().BackupArangoClientTimeout().WithTimeout(context.Background())
defer cancel()
backups, err := ac.driver.Backup().List(ctx, nil)
@ -75,7 +75,7 @@ func (ac *arangoClientBackupImpl) List() (map[driver.BackupID]driver.BackupMeta,
}
func (ac *arangoClientBackupImpl) Create() (ArangoBackupCreateResponse, error) {
dt := defaultArangoClientTimeout
dt := globals.GetGlobalTimeouts().BackupArangoClientTimeout().Get()
co := driver.BackupCreateOptions{}
@ -110,7 +110,7 @@ func (ac *arangoClientBackupImpl) Create() (ArangoBackupCreateResponse, error) {
}
func (ac *arangoClientBackupImpl) Get(backupID driver.BackupID) (driver.BackupMeta, error) {
ctx, cancel := context.WithTimeout(context.Background(), defaultArangoClientTimeout)
ctx, cancel := globals.GetGlobalTimeouts().BackupArangoClientTimeout().WithTimeout(context.Background())
defer cancel()
// list, err := ac.driver.Backup().List(ctx, &driver.BackupListOptions{ID: backupID})
@ -148,7 +148,7 @@ func (ac *arangoClientBackupImpl) getCredentialsFromSecret(ctx context.Context,
}
func (ac *arangoClientBackupImpl) Upload(backupID driver.BackupID) (driver.BackupTransferJobID, error) {
ctx, cancel := context.WithTimeout(context.Background(), defaultArangoClientTimeout)
ctx, cancel := globals.GetGlobalTimeouts().BackupArangoClientUploadTimeout().WithTimeout(context.Background())
defer cancel()
uploadSpec := ac.backup.Spec.Upload
@ -165,7 +165,7 @@ func (ac *arangoClientBackupImpl) Upload(backupID driver.BackupID) (driver.Backu
}
func (ac *arangoClientBackupImpl) Download(backupID driver.BackupID) (driver.BackupTransferJobID, error) {
ctx, cancel := context.WithTimeout(context.Background(), defaultArangoClientTimeout)
ctx, cancel := globals.GetGlobalTimeouts().BackupArangoClientUploadTimeout().WithTimeout(context.Background())
defer cancel()
downloadSpec := ac.backup.Spec.Download
@ -182,7 +182,7 @@ func (ac *arangoClientBackupImpl) Download(backupID driver.BackupID) (driver.Bac
}
func (ac *arangoClientBackupImpl) Progress(jobID driver.BackupTransferJobID) (ArangoBackupProgress, error) {
ctx, cancel := context.WithTimeout(context.Background(), defaultArangoClientTimeout)
ctx, cancel := globals.GetGlobalTimeouts().BackupArangoClientTimeout().WithTimeout(context.Background())
defer cancel()
report, err := ac.driver.Backup().Progress(ctx, jobID)
@ -243,14 +243,14 @@ func (ac *arangoClientBackupImpl) Exists(backupID driver.BackupID) (bool, error)
}
func (ac *arangoClientBackupImpl) Delete(backupID driver.BackupID) error {
ctx, cancel := context.WithTimeout(context.Background(), defaultArangoClientTimeout)
ctx, cancel := globals.GetGlobalTimeouts().BackupArangoClientTimeout().WithTimeout(context.Background())
defer cancel()
return ac.driver.Backup().Delete(ctx, backupID)
}
func (ac *arangoClientBackupImpl) Abort(jobID driver.BackupTransferJobID) error {
ctx, cancel := context.WithTimeout(context.Background(), defaultArangoClientTimeout)
ctx, cancel := globals.GetGlobalTimeouts().BackupArangoClientTimeout().WithTimeout(context.Background())
defer cancel()
return ac.driver.Backup().Abort(ctx, jobID)

View file

@ -52,11 +52,9 @@ func newFakeHandler() *handler {
k := fake.NewSimpleClientset()
return &handler{
client: f,
kubeClient: k,
arangoClientTimeout: defaultArangoClientTimeout,
eventRecorder: newEventInstance(event.NewEventRecorder("mock", k)),
client: f,
kubeClient: k,
eventRecorder: newEventInstance(event.NewEventRecorder("mock", k)),
}
}

View file

@ -50,9 +50,8 @@ import (
var logger = logging.Global().RegisterAndGetLogger("backup-operator", logging.Info)
const (
defaultArangoClientTimeout = 30 * time.Second
retryCount = 25
retryDelay = time.Second
retryCount = 25
retryDelay = time.Second
// StateChange name of the event send when state changed
StateChange = "StateChange"
@ -71,7 +70,6 @@ type handler struct {
eventRecorder event.RecorderInstance
arangoClientFactory ArangoClientFactory
arangoClientTimeout time.Duration
operator operator.Operator
}

View file

@ -53,8 +53,6 @@ func RegisterInformer(operator operator.Operator, recorder event.Recorder, clien
eventRecorder: newEventInstance(recorder),
operator: operator,
arangoClientTimeout: defaultArangoClientTimeout,
}
h.arangoClientFactory = newArangoClientBackupFactory(h)

View file

@ -29,6 +29,9 @@ const (
DefaultArangoDCheckTimeout = time.Second * 2
DefaultReconciliationTimeout = time.Minute
BackupDefaultArangoClientTimeout = 30 * time.Second
BackupUploadArangoClientTimeout = 300 * time.Second
// DefaultOutSyncedShardRebuildTimeout
// timeout after which particular out-synced shard is considered as failed and rebuild is triggered
DefaultOutSyncedShardRebuildTimeout = time.Minute * 60
@ -42,13 +45,15 @@ const (
var globalObj = &globals{
timeouts: &globalTimeouts{
requests: NewTimeout(DefaultKubernetesTimeout),
arangod: NewTimeout(DefaultArangoDTimeout),
arangodCheck: NewTimeout(DefaultArangoDCheckTimeout),
reconciliation: NewTimeout(DefaultReconciliationTimeout),
agency: NewTimeout(DefaultArangoDAgencyTimeout),
shardRebuild: NewTimeout(DefaultOutSyncedShardRebuildTimeout),
shardRebuildRetry: NewTimeout(DefaultOutSyncedShardRebuildRetryTimeout),
requests: NewTimeout(DefaultKubernetesTimeout),
arangod: NewTimeout(DefaultArangoDTimeout),
arangodCheck: NewTimeout(DefaultArangoDCheckTimeout),
reconciliation: NewTimeout(DefaultReconciliationTimeout),
agency: NewTimeout(DefaultArangoDAgencyTimeout),
shardRebuild: NewTimeout(DefaultOutSyncedShardRebuildTimeout),
shardRebuildRetry: NewTimeout(DefaultOutSyncedShardRebuildRetryTimeout),
backupArangoClientTimeout: NewTimeout(BackupDefaultArangoClientTimeout),
backupArangoClientUploadTimeout: NewTimeout(BackupUploadArangoClientTimeout),
},
kubernetes: &globalKubernetes{
requestBatchSize: NewInt64(DefaultKubernetesRequestBatchSize),
@ -123,10 +128,15 @@ type GlobalTimeouts interface {
ArangoD() Timeout
ArangoDCheck() Timeout
Agency() Timeout
BackupArangoClientTimeout() Timeout
BackupArangoClientUploadTimeout() Timeout
}
type globalTimeouts struct {
requests, arangod, reconciliation, arangodCheck, agency, shardRebuild, shardRebuildRetry Timeout
backupArangoClientTimeout Timeout
backupArangoClientUploadTimeout Timeout
}
func (g *globalTimeouts) Agency() Timeout {
@ -156,3 +166,11 @@ func (g *globalTimeouts) ArangoD() Timeout {
func (g *globalTimeouts) Kubernetes() Timeout {
return g.requests
}
func (g *globalTimeouts) BackupArangoClientTimeout() Timeout {
return g.backupArangoClientTimeout
}
func (g *globalTimeouts) BackupArangoClientUploadTimeout() Timeout {
return g.backupArangoClientUploadTimeout
}