1
0
Fork 0
mirror of https://github.com/arangodb/kube-arangodb.git synced 2024-12-14 11:57:37 +00:00

[Feature] Async restore (#982)

This commit is contained in:
Adam Janikowski 2022-05-09 12:47:20 +03:00 committed by GitHub
parent 3191f5ef43
commit b209095115
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 169 additions and 15 deletions

View file

@ -8,6 +8,7 @@
- (Maintenance) Add check make targets
- (Feature) Create support for local variables in actions.
- (Feature) Support for asynchronous ArangoD resquests.
- (Feature) Change Restore in Cluster mode to Async Request
## [1.2.11](https://github.com/arangodb/kube-arangodb/tree/1.2.11) (2022-04-30)
- (Bugfix) Orphan PVC are not removed

View file

@ -36,6 +36,8 @@ import (
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
)
type ConnectionWrap func(c driver.Connection) driver.Connection
type Cache interface {
GetAuth() conn.Auth
@ -43,6 +45,7 @@ type Cache interface {
Get(ctx context.Context, group api.ServerGroup, id string) (driver.Client, error)
GetDatabase(ctx context.Context) (driver.Client, error)
GetDatabaseWithWrap(ctx context.Context, wraps ...ConnectionWrap) (driver.Client, error)
GetAgency(ctx context.Context) (agency.Agency, error)
}
@ -144,6 +147,25 @@ func (cc *cache) GetDatabase(ctx context.Context) (driver.Client, error) {
}
}
func (cc *cache) GetDatabaseWithWrap(ctx context.Context, wraps ...ConnectionWrap) (driver.Client, error) {
c, err := cc.getDatabaseClient()
if err != nil {
return nil, err
}
conn := c.Connection()
for _, w := range wraps {
if w != nil {
conn = w(conn)
}
}
return driver.NewClient(driver.ClientConfig{
Connection: conn,
})
}
// GetAgency returns a cached client for the agency
func (cc *cache) GetAgency(ctx context.Context) (agency.Agency, error) {
cc.mutex.Lock()

View file

@ -187,6 +187,14 @@ func (d *Deployment) GetDatabaseClient(ctx context.Context) (driver.Client, erro
return c, nil
}
func (d *Deployment) GetDatabaseAsyncClient(ctx context.Context) (driver.Client, error) {
c, err := d.clientCache.GetDatabaseWithWrap(ctx, conn.NewAsyncConnection)
if err != nil {
return nil, errors.WithStack(err)
}
return c, nil
}
// GetServerClient returns a cached client for a specific server.
func (d *Deployment) GetServerClient(ctx context.Context, group api.ServerGroup, id string) (driver.Client, error) {
c, err := d.clientCache.Get(ctx, group, id)

View file

@ -28,13 +28,21 @@ import (
"github.com/arangodb/go-driver"
"github.com/rs/zerolog"
backupApi "github.com/arangodb/kube-arangodb/pkg/apis/backup/v1"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
"github.com/arangodb/kube-arangodb/pkg/util/arangod/conn"
"github.com/arangodb/kube-arangodb/pkg/util/errors"
)
func init() {
registerAction(api.ActionTypeBackupRestore, newBackupRestoreAction, backupRestoreTimeout)
}
const (
actionBackupRestoreLocalJobID api.PlanLocalKey = "jobID"
actionBackupRestoreLocalBackupName api.PlanLocalKey = "backupName"
)
func newBackupRestoreAction(log zerolog.Logger, action api.Action, actionCtx ActionContext) Action {
a := &actionBackupRestore{}
@ -47,8 +55,6 @@ func newBackupRestoreAction(log zerolog.Logger, action api.Action, actionCtx Act
type actionBackupRestore struct {
// actionImpl implement timeout and member id functions
actionImpl
actionEmptyCheckProgress
}
func (a actionBackupRestore) Start(ctx context.Context) (bool, error) {
@ -64,13 +70,6 @@ func (a actionBackupRestore) Start(ctx context.Context) (bool, error) {
return true, nil
}
ctxChild, cancel := globals.GetGlobalTimeouts().ArangoD().WithTimeout(ctx)
defer cancel()
dbc, err := a.actionCtx.GetDatabaseClient(ctxChild)
if err != nil {
return false, err
}
backupResource, err := a.actionCtx.GetBackup(ctx, *spec.RestoreFrom)
if err != nil {
a.log.Error().Err(err).Msg("Unable to find backup")
@ -96,15 +95,61 @@ func (a actionBackupRestore) Start(ctx context.Context) (bool, error) {
return false, err
}
switch mode := a.actionCtx.GetSpec().Mode.Get(); mode {
case api.DeploymentModeActiveFailover, api.DeploymentModeSingle:
return a.restoreSync(ctx, backupResource)
case api.DeploymentModeCluster:
return a.restoreAsync(ctx, backupResource)
default:
return false, errors.Newf("Unknown mode %s", mode)
}
}
func (a actionBackupRestore) restoreAsync(ctx context.Context, backup *backupApi.ArangoBackup) (bool, error) {
ctxChild, cancel := globals.GetGlobalTimeouts().ArangoD().WithTimeout(ctx)
defer cancel()
dbc, err := a.actionCtx.GetDatabaseAsyncClient(ctxChild)
if err != nil {
return false, errors.Wrapf(err, "Unable to create client")
}
ctxChild, cancel = globals.GetGlobalTimeouts().ArangoD().WithTimeout(ctx)
defer cancel()
if err := dbc.Backup().Restore(ctxChild, driver.BackupID(backup.Status.Backup.ID), nil); err != nil {
if id, ok := conn.IsAsyncJobInProgress(err); ok {
a.actionCtx.Add(actionBackupRestoreLocalJobID, id, true)
a.actionCtx.Add(actionBackupRestoreLocalBackupName, backup.GetName(), true)
// Async request has been send
return false, nil
} else {
return false, errors.Wrapf(err, "Unknown restore error")
}
}
return false, errors.Newf("Async response not received")
}
func (a actionBackupRestore) restoreSync(ctx context.Context, backup *backupApi.ArangoBackup) (bool, error) {
ctxChild, cancel := globals.GetGlobalTimeouts().ArangoD().WithTimeout(ctx)
defer cancel()
dbc, err := a.actionCtx.GetDatabaseClient(ctxChild)
if err != nil {
a.log.Debug().Err(err).Msg("Failed to create database client")
return false, nil
}
// The below action can take a while so the full parent timeout context is used.
restoreError := dbc.Backup().Restore(ctx, driver.BackupID(backupResource.Status.Backup.ID), nil)
restoreError := dbc.Backup().Restore(ctx, driver.BackupID(backup.Status.Backup.ID), nil)
if restoreError != nil {
a.log.Error().Err(restoreError).Msg("Restore failed")
}
if err := a.actionCtx.WithStatusUpdate(ctx, func(s *api.DeploymentStatus) bool {
result := &api.DeploymentRestoreResult{
RequestedFrom: spec.GetRestoreFrom(),
RequestedFrom: backup.GetName(),
}
if restoreError != nil {
@ -118,9 +163,70 @@ func (a actionBackupRestore) Start(ctx context.Context) (bool, error) {
return true
}); err != nil {
a.log.Error().Err(err).Msg("Unable to ser restored state")
a.log.Error().Err(err).Msg("Unable to set restored state")
return false, err
}
return true, nil
}
func (a actionBackupRestore) CheckProgress(ctx context.Context) (bool, bool, error) {
backup, ok := a.actionCtx.Get(a.action, actionBackupRestoreLocalBackupName)
if !ok {
return false, false, errors.Newf("Local Key is missing in action: %s", actionBackupRestoreLocalBackupName)
}
job, ok := a.actionCtx.Get(a.action, actionBackupRestoreLocalJobID)
if !ok {
return false, false, errors.Newf("Local Key is missing in action: %s", actionBackupRestoreLocalJobID)
}
ctxChild, cancel := globals.GetGlobalTimeouts().ArangoD().WithTimeout(ctx)
defer cancel()
dbc, err := a.actionCtx.GetDatabaseAsyncClient(ctxChild)
if err != nil {
a.log.Debug().Err(err).Msg("Failed to create database client")
return false, false, nil
}
ctxChild, cancel = globals.GetGlobalTimeouts().ArangoD().WithTimeout(ctx)
defer cancel()
// Params does not matter in async fetch
restoreError := dbc.Backup().Restore(conn.WithAsyncID(ctxChild, job), "", nil)
if restoreError != nil {
if _, ok := conn.IsAsyncJobInProgress(restoreError); ok {
// Job still in progress
return false, false, nil
}
if errors.IsTemporary(restoreError) {
// Retry
return false, false, nil
}
}
// Restore is done
if err := a.actionCtx.WithStatusUpdate(ctx, func(s *api.DeploymentStatus) bool {
result := &api.DeploymentRestoreResult{
RequestedFrom: backup,
State: api.DeploymentRestoreStateRestored,
}
if restoreError != nil {
result.State = api.DeploymentRestoreStateRestoreFailed
result.Message = restoreError.Error()
}
s.Restore = result
return true
}); err != nil {
a.log.Error().Err(err).Msg("Unable to set restored state")
return false, false, err
}
return true, false, nil
}

View file

@ -155,6 +155,10 @@ type actionContext struct {
locals api.PlanLocals
}
func (ac *actionContext) GetDatabaseAsyncClient(ctx context.Context) (driver.Client, error) {
return ac.context.GetDatabaseAsyncClient(ctx)
}
func (ac *actionContext) CurrentLocals() api.PlanLocals {
return ac.locals
}

View file

@ -85,6 +85,11 @@ type testContext struct {
Inspector inspectorInterface.Inspector
}
func (c *testContext) GetDatabaseAsyncClient(ctx context.Context) (driver.Client, error) {
//TODO implement me
panic("implement me")
}
func (c *testContext) WithArangoMember(cache inspectorInterface.Inspector, timeout time.Duration, name string) reconciler.ArangoMemberModContext {
return reconciler.NewArangoMemberModContext(cache, timeout, name)
}

View file

@ -165,6 +165,10 @@ type DeploymentDatabaseClient interface {
// GetDatabaseClient returns a cached client for the entire database (cluster coordinators or single server),
// creating one if needed.
GetDatabaseClient(ctx context.Context) (driver.Client, error)
// GetDatabaseAsyncClient returns a cached client for the entire database (cluster coordinators or single server),
// creating one if needed. Only in AsyncMode
GetDatabaseAsyncClient(ctx context.Context) (driver.Client, error)
}
type DeploymentMemberClient interface {

View file

@ -20,7 +20,11 @@
package conn
import "fmt"
import (
"fmt"
"github.com/arangodb/kube-arangodb/pkg/util/errors"
)
func IsAsyncErrorNotFound(err error) bool {
if err == nil {
@ -31,7 +35,7 @@ func IsAsyncErrorNotFound(err error) bool {
return true
}
return false
return IsAsyncErrorNotFound(errors.Cause(err))
}
func newAsyncErrorNotFound(id string) error {
@ -57,7 +61,7 @@ func IsAsyncJobInProgress(err error) (string, bool) {
return v.jobID, true
}
return "", false
return IsAsyncJobInProgress(errors.Cause(err))
}
func newAsyncJobInProgress(id string) error {