From 27d448a4e912ad161f45e0386a3011d677424262 Mon Sep 17 00:00:00 2001 From: jwierzbo Date: Wed, 8 Nov 2023 11:21:22 +0100 Subject: [PATCH] GT-523 Async backup (#1460) --- .circleci/continue_config.yml | 8 ++ CHANGELOG.md | 1 + README.md | 55 ++++---- cmd/admin.go | 2 +- docs/api/ArangoBackup.V1.md | 10 +- docs/api/ArangoBackupPolicy.V1.md | 2 +- docs/features/README.md | 63 +++++----- docs/features/rebalancer_v2.md | 4 +- go.mod | 6 +- go.sum | 10 ++ internal/features.yaml | 7 ++ internal/features_test.go | 4 +- pkg/apis/backup/v1/backup_spec.go | 2 +- pkg/apis/backup/v1/backup_state.go | 4 +- pkg/deployment/features/backup_async.go | 38 ++++++ pkg/deployment/images.go | 2 +- .../reconcile/action_backup_restore.go | 2 +- pkg/handlers/backup/arango_client.go | 7 +- pkg/handlers/backup/arango_client_impl.go | 43 ++++++- .../backup/arango_client_mock_test.go | 18 +++ pkg/handlers/backup/backup_suite_test.go | 4 +- pkg/handlers/backup/handler.go | 12 +- pkg/handlers/backup/state.go | 1 + pkg/handlers/backup/state_create.go | 42 ++++++- pkg/handlers/backup/state_create_test.go | 59 ++++++++- pkg/handlers/backup/state_creating.go | 83 ++++++++++++ pkg/handlers/backup/state_creating_test.go | 119 ++++++++++++++++++ pkg/util/arangod/client.go | 24 ++-- 28 files changed, 533 insertions(+), 99 deletions(-) create mode 100644 pkg/deployment/features/backup_async.go create mode 100644 pkg/handlers/backup/state_creating.go create mode 100644 pkg/handlers/backup/state_creating_test.go diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml index d2a6c4ff4..32b250841 100644 --- a/.circleci/continue_config.yml +++ b/.circleci/continue_config.yml @@ -74,6 +74,14 @@ jobs: exit 0 fi make vulncheck + - run: + name: verify-generated docs + command: | + if [ -z "$CIRCLE_PULL_REQUEST" ]; then + echo "This is not a pull request. Skipping..." + exit 0 + fi + make verify-generated environment: GO111MODULES: off diff --git a/CHANGELOG.md b/CHANGELOG.md index b20b0ea36..bbfbc8701 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## [master](https://github.com/arangodb/kube-arangodb/tree/master) (N/A) - (Documentation) Improvements and fixes for rendered documentation (GH pages) - (Feature) License Manager +- (Improvement) Use Async mode for backup creation ## [1.2.35](https://github.com/arangodb/kube-arangodb/tree/1.2.35) (2023-11-06) - (Maintenance) Update go-driver to v1.6.0, update IsNotFound() checks diff --git a/README.md b/README.md index b4974c3fe..f747f3852 100644 --- a/README.md +++ b/README.md @@ -61,33 +61,34 @@ covers individual newer features separately. [START_INJECT]: # (featuresCommunityTable) -| Feature | Operator Version | Introduced | ArangoDB Version | ArangoDB Edition | State | Enabled | Flag | Remarks | -|:------------------------------------------------------------------------------|:-----------------|:-----------|:-----------------|:----------------------|:-------------|:--------|:------------------------------------------------------|:-----------------------------------------------------------------------------------| -| Enforced ResignLeadership | 1.2.34 | 1.2.34 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.enforced-resign-leadership | Enforce ResignLeadership and ensure that Leaders are moved from restarted DBServer | -| Copy resources spec to init containers | 1.2.33 | 1.2.33 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.init-containers-copy-resources | Copy resources spec to built-in init containers if they are not specified | -| [Rebalancer V2](docs/features/rebalancer_v2.md) | 1.2.31 | 1.2.31 | >= 3.10.0 | Community, Enterprise | Alpha | False | --deployment.feature.rebalancer-v2 | N/A | -| [Secured containers](docs/features/secured_containers.md) | 1.2.31 | 1.2.31 | >= 3.8.0 | Community, Enterprise | Alpha | False | --deployment.feature.secured-containers | If set to True Operator will run containers in secure mode | -| Version Check V2 | 1.2.31 | 1.2.31 | >= 3.8.0 | Community, Enterprise | Alpha | False | --deployment.feature.upgrade-version-check-V2 | N/A | -| [Operator Ephemeral Volumes](docs/features/ephemeral_volumes.md) | 1.2.31 | 1.2.2 | >= 3.8.0 | Community, Enterprise | Beta | False | --deployment.feature.ephemeral-volumes | N/A | -| [Force Rebuild Out Synced Shards](docs/features/rebuild_out_synced_shards.md) | 1.2.27 | 1.2.27 | >= 3.8.0 | Community, Enterprise | Production | False | --deployment.feature.force-rebuild-out-synced-shards | It should be used only if user is aware of the risks. | -| [Spec Default Restore](docs/features/deployment_spec_defaults.md) | 1.2.25 | 1.2.21 | >= 3.8.0 | Community, Enterprise | Beta | True | --deployment.feature.deployment-spec-defaults-restore | If set to False Operator will not change ArangoDeployment Spec | -| Version Check | 1.2.23 | 1.1.4 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.upgrade-version-check | N/A | -| [Failover Leader service](docs/features/failover_leader_service.md) | 1.2.13 | 1.2.13 | >= 3.8.0 | Community, Enterprise | Production | False | --deployment.feature.failover-leadership | N/A | -| Graceful Restart | 1.2.5 | 1.0.7 | >= 3.8.0 | Community, Enterprise | Production | True | ---deployment.feature.graceful-shutdown | N/A | -| Optional Graceful Restart | 1.2.0 | 1.2.5 | >= 3.8.0 | Community, Enterprise | Production | False | --deployment.feature.optional-graceful-shutdown | N/A | -| Operator Internal Metrics Exporter | 1.2.0 | 1.2.0 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.metrics-exporter | N/A | -| Operator Maintenance Management Support | 1.2.0 | 1.0.7 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.maintenance | N/A | -| Encryption Key Rotation Support | 1.2.0 | 1.0.3 | >= 3.8.0 | Enterprise | NotSupported | False | --deployment.feature.encryption-rotation | N/A | -| TLS Runtime Rotation Support | 1.1.0 | 1.0.4 | >= 3.8.0 | Enterprise | Production | True | --deployment.feature.tls-rotation | N/A | -| JWT Rotation Support | 1.1.0 | 1.0.3 | >= 3.8.0 | Enterprise | Production | True | --deployment.feature.jwt-rotation | N/A | -| Operator Single Mode | 1.0.4 | 1.0.4 | >= 3.8.0 | Community, Enterprise | Production | False | --mode.single | Only 1 instance of Operator allowed in namespace when feature is enabled | -| TLS SNI Support | 1.0.3 | 1.0.3 | >= 3.8.0 | Enterprise | Production | True | --deployment.feature.tls-sni | N/A | -| Disabling of liveness probes | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A | -| Pod Disruption Budgets | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A | -| Prometheus Metrics Exporter | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | Prometheus required | -| Sidecar Containers | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A | -| Volume Claim Templates | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A | -| Volume Resizing | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A | +| Feature | Operator Version | Introduced | ArangoDB Version | ArangoDB Edition | State | Enabled | Flag | Remarks | +|:------------------------------------------------------------------------------|:-----------------|:-----------|:-----------------|:----------------------|:-------------|:--------|:------------------------------------------------------|:--------------------------------------------------------------------------------------| +| Create backups asynchronously | 1.2.35 | 1.2.35 | >= 3.8.0 | Community, Enterprise | Production | False | --deployment.feature.async-backup-creation | Create backups asynchronously to avoid blocking the operator and reaching the timeout | +| Enforced ResignLeadership | 1.2.34 | 1.2.34 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.enforced-resign-leadership | Enforce ResignLeadership and ensure that Leaders are moved from restarted DBServer | +| Copy resources spec to init containers | 1.2.33 | 1.2.33 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.init-containers-copy-resources | Copy resources spec to built-in init containers if they are not specified | +| [Rebalancer V2](docs/features/rebalancer_v2.md) | 1.2.31 | 1.2.31 | >= 3.10.0 | Community, Enterprise | Alpha | False | --deployment.feature.rebalancer-v2 | N/A | +| [Secured containers](docs/features/secured_containers.md) | 1.2.31 | 1.2.31 | >= 3.8.0 | Community, Enterprise | Alpha | False | --deployment.feature.secured-containers | If set to True Operator will run containers in secure mode | +| Version Check V2 | 1.2.31 | 1.2.31 | >= 3.8.0 | Community, Enterprise | Alpha | False | --deployment.feature.upgrade-version-check-V2 | N/A | +| [Operator Ephemeral Volumes](docs/features/ephemeral_volumes.md) | 1.2.31 | 1.2.2 | >= 3.8.0 | Community, Enterprise | Beta | False | --deployment.feature.ephemeral-volumes | N/A | +| [Force Rebuild Out Synced Shards](docs/features/rebuild_out_synced_shards.md) | 1.2.27 | 1.2.27 | >= 3.8.0 | Community, Enterprise | Production | False | --deployment.feature.force-rebuild-out-synced-shards | It should be used only if user is aware of the risks. | +| [Spec Default Restore](docs/features/deployment_spec_defaults.md) | 1.2.25 | 1.2.21 | >= 3.8.0 | Community, Enterprise | Beta | True | --deployment.feature.deployment-spec-defaults-restore | If set to False Operator will not change ArangoDeployment Spec | +| Version Check | 1.2.23 | 1.1.4 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.upgrade-version-check | N/A | +| [Failover Leader service](docs/features/failover_leader_service.md) | 1.2.13 | 1.2.13 | >= 3.8.0 | Community, Enterprise | Production | False | --deployment.feature.failover-leadership | N/A | +| Graceful Restart | 1.2.5 | 1.0.7 | >= 3.8.0 | Community, Enterprise | Production | True | ---deployment.feature.graceful-shutdown | N/A | +| Optional Graceful Restart | 1.2.0 | 1.2.5 | >= 3.8.0 | Community, Enterprise | Production | False | --deployment.feature.optional-graceful-shutdown | N/A | +| Operator Internal Metrics Exporter | 1.2.0 | 1.2.0 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.metrics-exporter | N/A | +| Operator Maintenance Management Support | 1.2.0 | 1.0.7 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.maintenance | N/A | +| Encryption Key Rotation Support | 1.2.0 | 1.0.3 | >= 3.8.0 | Enterprise | NotSupported | False | --deployment.feature.encryption-rotation | N/A | +| TLS Runtime Rotation Support | 1.1.0 | 1.0.4 | >= 3.8.0 | Enterprise | Production | True | --deployment.feature.tls-rotation | N/A | +| JWT Rotation Support | 1.1.0 | 1.0.3 | >= 3.8.0 | Enterprise | Production | True | --deployment.feature.jwt-rotation | N/A | +| Operator Single Mode | 1.0.4 | 1.0.4 | >= 3.8.0 | Community, Enterprise | Production | False | --mode.single | Only 1 instance of Operator allowed in namespace when feature is enabled | +| TLS SNI Support | 1.0.3 | 1.0.3 | >= 3.8.0 | Enterprise | Production | True | --deployment.feature.tls-sni | N/A | +| Disabling of liveness probes | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A | +| Pod Disruption Budgets | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A | +| Prometheus Metrics Exporter | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | Prometheus required | +| Sidecar Containers | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A | +| Volume Claim Templates | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A | +| Volume Resizing | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A | [END_INJECT]: # (featuresCommunityTable) diff --git a/cmd/admin.go b/cmd/admin.go index 84272deb1..fadecc673 100644 --- a/cmd/admin.go +++ b/cmd/admin.go @@ -284,7 +284,7 @@ func createClient(endpoints []string, certCA *x509.CertPool, auth connection.Aut conf := connection.HttpConfiguration{ Authentication: auth, ContentType: contentType, - Endpoint: connection.NewEndpoints(endpoints...), + Endpoint: connection.NewRoundRobinEndpoints(endpoints), Transport: &http.Transport{ TLSClientConfig: &tls.Config{ RootCAs: certCA, diff --git a/docs/api/ArangoBackup.V1.md b/docs/api/ArangoBackup.V1.md index 7578c2c6a..2ca9c828c 100644 --- a/docs/api/ArangoBackup.V1.md +++ b/docs/api/ArangoBackup.V1.md @@ -91,7 +91,7 @@ This field is **immutable**: can't be changed after backup creation ### .spec.options.timeout: number -Timeout for Backup creation request in seconds. +Timeout for Backup creation request in seconds. Works only when AsyncBackupCreation feature is set to false. Default Value: `30` @@ -197,13 +197,13 @@ Uploaded Determines if ArangoBackup has been uploaded Message for the state this object is in. -[Code Reference](https://github.com/arangodb/kube-arangodb/blob/1.2.35/pkg/apis/backup/v1/backup_state.go#L86) +[Code Reference](https://github.com/arangodb/kube-arangodb/blob/1.2.35/pkg/apis/backup/v1/backup_state.go#L88) ### .status.progress.jobID: string JobID ArangoDB job ID for uploading or downloading -[Code Reference](https://github.com/arangodb/kube-arangodb/blob/1.2.35/pkg/apis/backup/v1/backup_state.go#L109) +[Code Reference](https://github.com/arangodb/kube-arangodb/blob/1.2.35/pkg/apis/backup/v1/backup_state.go#L111) ### .status.progress.progress: string @@ -214,7 +214,7 @@ Example: 90% ``` -[Code Reference](https://github.com/arangodb/kube-arangodb/blob/1.2.35/pkg/apis/backup/v1/backup_state.go#L112) +[Code Reference](https://github.com/arangodb/kube-arangodb/blob/1.2.35/pkg/apis/backup/v1/backup_state.go#L114) ### .status.state: string @@ -235,5 +235,5 @@ Possible Values: * Failed - state for failure * Unavailable - state when Backup is not available on the ArangoDB. It can happen in case of upgrades, node restarts etc. -[Code Reference](https://github.com/arangodb/kube-arangodb/blob/1.2.35/pkg/apis/backup/v1/backup_state.go#L80) +[Code Reference](https://github.com/arangodb/kube-arangodb/blob/1.2.35/pkg/apis/backup/v1/backup_state.go#L82) diff --git a/docs/api/ArangoBackupPolicy.V1.md b/docs/api/ArangoBackupPolicy.V1.md index f911fa043..7800e62c9 100644 --- a/docs/api/ArangoBackupPolicy.V1.md +++ b/docs/api/ArangoBackupPolicy.V1.md @@ -78,7 +78,7 @@ This field is **immutable**: can't be changed after backup creation ### .spec.template.options.timeout: number -Timeout for Backup creation request in seconds. +Timeout for Backup creation request in seconds. Works only when AsyncBackupCreation feature is set to false. Default Value: `30` diff --git a/docs/features/README.md b/docs/features/README.md index 7b14cda09..092638f62 100644 --- a/docs/features/README.md +++ b/docs/features/README.md @@ -1,5 +1,37 @@ ## List of Community Edition features +| Feature | Operator Version | Introduced | ArangoDB Version | ArangoDB Edition | State | Enabled | Flag | Remarks | +|:----------------------------------------------------------------|:-----------------|:-----------|:-----------------|:----------------------|:-------------|:--------|:------------------------------------------------------|:--------------------------------------------------------------------------------------| +| Create backups asynchronously | 1.2.35 | 1.2.35 | >= 3.8.0 | Community, Enterprise | Production | False | --deployment.feature.async-backup-creation | Create backups asynchronously to avoid blocking the operator and reaching the timeout | +| Enforced ResignLeadership | 1.2.34 | 1.2.34 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.enforced-resign-leadership | Enforce ResignLeadership and ensure that Leaders are moved from restarted DBServer | +| Copy resources spec to init containers | 1.2.33 | 1.2.33 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.init-containers-copy-resources | Copy resources spec to built-in init containers if they are not specified | +| [Rebalancer V2](rebalancer_v2.md) | 1.2.31 | 1.2.31 | >= 3.10.0 | Community, Enterprise | Alpha | False | --deployment.feature.rebalancer-v2 | N/A | +| [Secured containers](secured_containers.md) | 1.2.31 | 1.2.31 | >= 3.8.0 | Community, Enterprise | Alpha | False | --deployment.feature.secured-containers | If set to True Operator will run containers in secure mode | +| Version Check V2 | 1.2.31 | 1.2.31 | >= 3.8.0 | Community, Enterprise | Alpha | False | --deployment.feature.upgrade-version-check-V2 | N/A | +| [Operator Ephemeral Volumes](ephemeral_volumes.md) | 1.2.31 | 1.2.2 | >= 3.8.0 | Community, Enterprise | Beta | False | --deployment.feature.ephemeral-volumes | N/A | +| [Force Rebuild Out Synced Shards](rebuild_out_synced_shards.md) | 1.2.27 | 1.2.27 | >= 3.8.0 | Community, Enterprise | Production | False | --deployment.feature.force-rebuild-out-synced-shards | It should be used only if user is aware of the risks. | +| [Spec Default Restore](deployment_spec_defaults.md) | 1.2.25 | 1.2.21 | >= 3.8.0 | Community, Enterprise | Beta | True | --deployment.feature.deployment-spec-defaults-restore | If set to False Operator will not change ArangoDeployment Spec | +| Version Check | 1.2.23 | 1.1.4 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.upgrade-version-check | N/A | +| [Failover Leader service](failover_leader_service.md) | 1.2.13 | 1.2.13 | >= 3.8.0 | Community, Enterprise | Production | False | --deployment.feature.failover-leadership | N/A | +| Graceful Restart | 1.2.5 | 1.0.7 | >= 3.8.0 | Community, Enterprise | Production | True | ---deployment.feature.graceful-shutdown | N/A | +| Optional Graceful Restart | 1.2.0 | 1.2.5 | >= 3.8.0 | Community, Enterprise | Production | False | --deployment.feature.optional-graceful-shutdown | N/A | +| Operator Internal Metrics Exporter | 1.2.0 | 1.2.0 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.metrics-exporter | N/A | +| Operator Maintenance Management Support | 1.2.0 | 1.0.7 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.maintenance | N/A | +| Encryption Key Rotation Support | 1.2.0 | 1.0.3 | >= 3.8.0 | Enterprise | NotSupported | False | --deployment.feature.encryption-rotation | N/A | +| TLS Runtime Rotation Support | 1.1.0 | 1.0.4 | >= 3.8.0 | Enterprise | Production | True | --deployment.feature.tls-rotation | N/A | +| JWT Rotation Support | 1.1.0 | 1.0.3 | >= 3.8.0 | Enterprise | Production | True | --deployment.feature.jwt-rotation | N/A | +| Operator Single Mode | 1.0.4 | 1.0.4 | >= 3.8.0 | Community, Enterprise | Production | False | --mode.single | Only 1 instance of Operator allowed in namespace when feature is enabled | +| TLS SNI Support | 1.0.3 | 1.0.3 | >= 3.8.0 | Enterprise | Production | True | --deployment.feature.tls-sni | N/A | +| Disabling of liveness probes | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A | +| Pod Disruption Budgets | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A | +| Prometheus Metrics Exporter | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | Prometheus required | +| Sidecar Containers | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A | +| Volume Claim Templates | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A | +| Volume Resizing | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A | + + +## List of Enterprise Edition features + | Feature | Operator Version | Introduced | ArangoDB Version | ArangoDB Edition | State | Enabled | Flag | Remarks | |:-----------------------------------------------------|:-----------------|:-----------|:-----------------|:-----------------|:-----------|:--------|:-----|:----------------------------------------------------------------------------| | AgencyCache | 1.2.30 | 1.2.30 | >= 3.8.0 | Enterprise | Production | True | N/A | Enable Agency Cache mechanism in the Operator (Increase limit of the nodes) | @@ -8,34 +40,3 @@ | [TopologyAwareness](../design/topology_awareness.md) | 1.2.4 | 1.2.4 | >= 3.8.0 | Enterprise | Production | True | N/A | N/A | -## List of Enterprise Edition features - -| Feature | Operator Version | Introduced | ArangoDB Version | ArangoDB Edition | State | Enabled | Flag | Remarks | -|:----------------------------------------------------------------|:-----------------|:-----------|:-----------------|:----------------------|:-------------|:--------|:------------------------------------------------------|:-----------------------------------------------------------------------------------| -| Enforced ResignLeadership | 1.2.34 | 1.2.34 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.enforced-resign-leadership | Enforce ResignLeadership and ensure that Leaders are moved from restarted DBServer | -| Copy resources spec to init containers | 1.2.33 | 1.2.33 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.init-containers-copy-resources | Copy resources spec to built-in init containers if they are not specified | -| [Rebalancer V2](rebalancer_v2.md) | 1.2.31 | 1.2.31 | >= 3.10.0 | Community, Enterprise | Alpha | False | --deployment.feature.rebalancer-v2 | N/A | -| [Secured containers](secured_containers.md) | 1.2.31 | 1.2.31 | >= 3.8.0 | Community, Enterprise | Alpha | False | --deployment.feature.secured-containers | If set to True Operator will run containers in secure mode | -| Version Check V2 | 1.2.31 | 1.2.31 | >= 3.8.0 | Community, Enterprise | Alpha | False | --deployment.feature.upgrade-version-check-V2 | N/A | -| [Operator Ephemeral Volumes](ephemeral_volumes.md) | 1.2.31 | 1.2.2 | >= 3.8.0 | Community, Enterprise | Beta | False | --deployment.feature.ephemeral-volumes | N/A | -| [Force Rebuild Out Synced Shards](rebuild_out_synced_shards.md) | 1.2.27 | 1.2.27 | >= 3.8.0 | Community, Enterprise | Production | False | --deployment.feature.force-rebuild-out-synced-shards | It should be used only if user is aware of the risks. | -| [Spec Default Restore](deployment_spec_defaults.md) | 1.2.25 | 1.2.21 | >= 3.8.0 | Community, Enterprise | Beta | True | --deployment.feature.deployment-spec-defaults-restore | If set to False Operator will not change ArangoDeployment Spec | -| Version Check | 1.2.23 | 1.1.4 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.upgrade-version-check | N/A | -| [Failover Leader service](failover_leader_service.md) | 1.2.13 | 1.2.13 | >= 3.8.0 | Community, Enterprise | Production | False | --deployment.feature.failover-leadership | N/A | -| Graceful Restart | 1.2.5 | 1.0.7 | >= 3.8.0 | Community, Enterprise | Production | True | ---deployment.feature.graceful-shutdown | N/A | -| Optional Graceful Restart | 1.2.0 | 1.2.5 | >= 3.8.0 | Community, Enterprise | Production | False | --deployment.feature.optional-graceful-shutdown | N/A | -| Operator Internal Metrics Exporter | 1.2.0 | 1.2.0 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.metrics-exporter | N/A | -| Operator Maintenance Management Support | 1.2.0 | 1.0.7 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.maintenance | N/A | -| Encryption Key Rotation Support | 1.2.0 | 1.0.3 | >= 3.8.0 | Enterprise | NotSupported | False | --deployment.feature.encryption-rotation | N/A | -| TLS Runtime Rotation Support | 1.1.0 | 1.0.4 | >= 3.8.0 | Enterprise | Production | True | --deployment.feature.tls-rotation | N/A | -| JWT Rotation Support | 1.1.0 | 1.0.3 | >= 3.8.0 | Enterprise | Production | True | --deployment.feature.jwt-rotation | N/A | -| Operator Single Mode | 1.0.4 | 1.0.4 | >= 3.8.0 | Community, Enterprise | Production | False | --mode.single | Only 1 instance of Operator allowed in namespace when feature is enabled | -| TLS SNI Support | 1.0.3 | 1.0.3 | >= 3.8.0 | Enterprise | Production | True | --deployment.feature.tls-sni | N/A | -| Disabling of liveness probes | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A | -| Pod Disruption Budgets | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A | -| Prometheus Metrics Exporter | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | Prometheus required | -| Sidecar Containers | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A | -| Volume Claim Templates | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A | -| Volume Resizing | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A | - - diff --git a/docs/features/rebalancer_v2.md b/docs/features/rebalancer_v2.md index b29fdda73..e970f76b3 100644 --- a/docs/features/rebalancer_v2.md +++ b/docs/features/rebalancer_v2.md @@ -2,9 +2,9 @@ ## Overview -ArangoDB as of 3.10.0 provide Cluster Rebalancer functionality via [api](https://www.arangodb.com/docs/stable/http/cluster.html#rebalance). +ArangoDB as of 3.10.0 provides Cluster Rebalancer functionality via [api](https://www.arangodb.com/docs/stable/http/cluster.html#rebalance). -Operator will use above functionality to check shard movement plan and enforce it on the Cluster. +Operator will use the above functionality to check shard movement plan and enforce it on the Cluster. ## How to use diff --git a/go.mod b/go.mod index 625d4796a..b4a8c53dc 100644 --- a/go.mod +++ b/go.mod @@ -25,8 +25,8 @@ require ( github.com/arangodb-helper/go-certificates v0.0.0-20180821055445-9fca24fc2680 github.com/arangodb-helper/go-helper v0.4.2 github.com/arangodb/arangosync-client v0.9.0 - github.com/arangodb/go-driver v1.6.0 - github.com/arangodb/go-driver/v2 v2.0.0-20230616090327-3b9171814ae4 + github.com/arangodb/go-driver v1.6.1 + github.com/arangodb/go-driver/v2 v2.0.3 github.com/arangodb/go-upgrade-rules v0.0.0-20180809110947-031b4774ff21 //github.com/arangodb/rebalancer v0.1.1 //github.com/arangodb/go-agency-helper v0.3.0 @@ -130,4 +130,6 @@ require ( k8s.io/utils v0.0.0-20220728103510-ee6ede2d64ed // indirect sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect + github.com/dchest/siphash v1.2.2 // indirect + github.com/kkdai/maglev v0.2.0 // indirect ) diff --git a/go.sum b/go.sum index 6b5214566..d49c148f7 100644 --- a/go.sum +++ b/go.sum @@ -81,8 +81,13 @@ github.com/arangodb/go-agency-helper v0.4.0/go.mod h1:IMzQ1JilLu764DgFQ1qh21jPEz github.com/arangodb/go-driver v1.2.1/go.mod h1:zdDkJJnCj8DAkfbtIjIXnsTrWIiy6VhP3Vy14p+uQeY= github.com/arangodb/go-driver v1.6.0 h1:NFWj/idqXZxhFVueihMSI2R9NotNIsgvNfM/xmpekb4= github.com/arangodb/go-driver v1.6.0/go.mod h1:HQmdGkvNMVBTE3SIPSQ8T/ZddC6iwNsfMR+dDJQxIsI= +github.com/arangodb/go-driver v1.6.1-0.20231025111232-5473b2e39799/go.mod h1:F1fFEduYiEUxeb+JioB6aMqPt0CRUcgtSwGZHhNeeLg= +github.com/arangodb/go-driver v1.6.1 h1:bnhrpbA4U1NU13JOWs5sWWYMtQwdjKT0+jkl8dSndyY= +github.com/arangodb/go-driver v1.6.1/go.mod h1:ywucwwi34GBxxXFWw/ym+7/66//L4K9abxk/sFJro2k= github.com/arangodb/go-driver/v2 v2.0.0-20230616090327-3b9171814ae4 h1:LpIIPBcrbZ/mVDG2ioZN92Pbgb5HQ2Vnqj/UaQAyN4E= github.com/arangodb/go-driver/v2 v2.0.0-20230616090327-3b9171814ae4/go.mod h1:bnCd24JXOgd5rpG8uwSpHPsF6NCcH4m6rKtKh1EJ7Lo= +github.com/arangodb/go-driver/v2 v2.0.3 h1:B/tKSgf4KSiLN0biqcH8Tm/Dj8nZdP5Lia2/xdy8KD0= +github.com/arangodb/go-driver/v2 v2.0.3/go.mod h1:iibpBwIQbE4uejDFCvLqwCVfgE72F51ZEehPme+BAug= github.com/arangodb/go-upgrade-rules v0.0.0-20180809110947-031b4774ff21 h1:+W7D5ttxi/Ygh/39vialtypE23p9KI7P0J2qtoqUV4w= github.com/arangodb/go-upgrade-rules v0.0.0-20180809110947-031b4774ff21/go.mod h1:RkPIG6JJ2pcJUoymc18NxAJGraZd+iAEVnOTDjZey/w= github.com/arangodb/go-velocypack v0.0.0-20200318135517-5af53c29c67e h1:Xg+hGrY2LcQBbxd0ZFdbGSyRKTYMZCfBbw/pMJFOk1g= @@ -155,6 +160,8 @@ github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dchest/siphash v1.2.2 h1:9DFz8tQwl9pTVt5iok/9zKyzA1Q6bRGiF3HPiEEVr9I= +github.com/dchest/siphash v1.2.2/go.mod h1:q+IRvb2gOSrUnYoPqHiyHXS0FOBBOdl6tONBlVnOnt4= github.com/dchest/uniuri v0.0.0-20160212164326-8902c56451e9 h1:74lLNRzvsdIlkTgfDSMuaPjBr4cf6k7pwQQANm/yLKU= github.com/dchest/uniuri v0.0.0-20160212164326-8902c56451e9/go.mod h1:GgB8SF9nRG+GqaDtLcwJZsQFhcogVCJ79j4EdT0c2V4= github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= @@ -199,6 +206,7 @@ github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vb github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= +github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= @@ -384,6 +392,8 @@ github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvW github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/kkdai/maglev v0.2.0 h1:w6DCW0kAA6fstZqXkrBrlgIC3jeIRXkjOYea/m6EK/Y= +github.com/kkdai/maglev v0.2.0/go.mod h1:d+mt8Lmt3uqi9aRb/BnPjzD0fy+ETs1vVXiGRnqHVZ4= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk= github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= diff --git a/internal/features.yaml b/internal/features.yaml index 8bc50a283..90b06454d 100644 --- a/internal/features.yaml +++ b/internal/features.yaml @@ -227,3 +227,10 @@ features: releases: - operatorVersion: 1.2.34 state: Production + - name: Create backups asynchronously + enabled: false + remarks: Create backups asynchronously to avoid blocking the operator and reaching the timeout + flag: --deployment.feature.async-backup-creation + releases: + - operatorVersion: 1.2.35 + state: Production diff --git a/internal/features_test.go b/internal/features_test.go index d203645f1..3e8564609 100644 --- a/internal/features_test.go +++ b/internal/features_test.go @@ -42,13 +42,13 @@ func Test_GenerateFeaturesIndex(t *testing.T) { const basePath = "docs/features" write(t, out, "## List of Community Edition features\n") - section, err := GenerateReadmeFeatures(root, basePath, true) + section, err := GenerateReadmeFeatures(root, basePath, false) require.NoError(t, err) write(t, out, section) write(t, out, "\n") write(t, out, "## List of Enterprise Edition features\n") - section, err = GenerateReadmeFeatures(root, basePath, false) + section, err = GenerateReadmeFeatures(root, basePath, true) require.NoError(t, err) write(t, out, section) write(t, out, "\n") diff --git a/pkg/apis/backup/v1/backup_spec.go b/pkg/apis/backup/v1/backup_spec.go index c21be357b..3d3753a6c 100644 --- a/pkg/apis/backup/v1/backup_spec.go +++ b/pkg/apis/backup/v1/backup_spec.go @@ -55,7 +55,7 @@ type ArangoBackupSpecDeployment struct { } type ArangoBackupSpecOptions struct { - // Timeout for Backup creation request in seconds. + // Timeout for Backup creation request in seconds. Works only when AsyncBackupCreation feature is set to false. // +doc/immutable: can't be changed after backup creation // +doc/default: 30 Timeout *float32 `json:"timeout,omitempty"` diff --git a/pkg/apis/backup/v1/backup_state.go b/pkg/apis/backup/v1/backup_state.go index 03fc2d35f..a37933675 100644 --- a/pkg/apis/backup/v1/backup_state.go +++ b/pkg/apis/backup/v1/backup_state.go @@ -34,6 +34,7 @@ const ( ArangoBackupStateDownloadError state.State = "DownloadError" ArangoBackupStateDownloading state.State = "Downloading" ArangoBackupStateCreate state.State = "Create" + ArangoBackupStateCreating state.State = "Creating" ArangoBackupStateCreateError state.State = "CreateError" ArangoBackupStateUpload state.State = "Upload" ArangoBackupStateUploading state.State = "Uploading" @@ -51,7 +52,8 @@ var ArangoBackupStateMap = state.Map{ ArangoBackupStateDownload: {ArangoBackupStateDownloading, ArangoBackupStateFailed, ArangoBackupStateDownloadError}, ArangoBackupStateDownloading: {ArangoBackupStateReady, ArangoBackupStateFailed, ArangoBackupStateDownloadError}, ArangoBackupStateDownloadError: {ArangoBackupStatePending, ArangoBackupStateFailed}, - ArangoBackupStateCreate: {ArangoBackupStateReady, ArangoBackupStateFailed, ArangoBackupStateCreateError}, + ArangoBackupStateCreate: {ArangoBackupStateReady, ArangoBackupStateFailed, ArangoBackupStateCreateError, ArangoBackupStateCreating}, + ArangoBackupStateCreating: {ArangoBackupStateReady, ArangoBackupStateFailed, ArangoBackupStateCreateError}, ArangoBackupStateCreateError: {ArangoBackupStateFailed, ArangoBackupStateCreate}, ArangoBackupStateUpload: {ArangoBackupStateUploading, ArangoBackupStateFailed, ArangoBackupStateDeleted, ArangoBackupStateUploadError}, ArangoBackupStateUploading: {ArangoBackupStateReady, ArangoBackupStateFailed, ArangoBackupStateUploadError}, diff --git a/pkg/deployment/features/backup_async.go b/pkg/deployment/features/backup_async.go new file mode 100644 index 000000000..ae45aa920 --- /dev/null +++ b/pkg/deployment/features/backup_async.go @@ -0,0 +1,38 @@ +// +// DISCLAIMER +// +// Copyright 2023 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// + +package features + +func init() { + registerFeature(asyncBackupCreation) +} + +var asyncBackupCreation = &feature{ + name: "async-backup-creation", + description: "Create backups asynchronously to avoid blocking the operator and reaching the timeout", + version: "3.7.0", + enterpriseRequired: false, + enabledByDefault: false, +} + +// AsyncBackupCreation returns mode for backup creation (sync/async). +func AsyncBackupCreation() Feature { + return asyncBackupCreation +} diff --git a/pkg/deployment/images.go b/pkg/deployment/images.go index e2c9eabe1..386ccb34f 100644 --- a/pkg/deployment/images.go +++ b/pkg/deployment/images.go @@ -175,7 +175,7 @@ func (ib *imagesBuilder) fetchArangoDBImageIDAndVersion(ctx context.Context, cac } // Try fetching the ArangoDB version - client, err := arangod.CreateArangodImageIDClient(ctx, ib.APIObject, pod.Status.PodIP) + client, err := arangod.CreateArangodImageIDClient(ctx, ib.APIObject, pod.Status.PodIP, false) if err != nil { log.Err(err).Warn("Failed to create Image ID Pod client") return true, nil diff --git a/pkg/deployment/reconcile/action_backup_restore.go b/pkg/deployment/reconcile/action_backup_restore.go index 7d80bf86b..9505c6be5 100644 --- a/pkg/deployment/reconcile/action_backup_restore.go +++ b/pkg/deployment/reconcile/action_backup_restore.go @@ -117,7 +117,7 @@ func (a actionBackupRestore) restoreAsync(ctx context.Context, backup *backupApi a.actionCtx.Add(actionBackupRestoreLocalJobID, id, true) a.actionCtx.Add(actionBackupRestoreLocalBackupName, backup.GetName(), true) - // Async request has been send + // Async request has been sent return false, nil } else { return false, errors.Wrapf(err, "Unknown restore error") diff --git a/pkg/handlers/backup/arango_client.go b/pkg/handlers/backup/arango_client.go index 5fc4c1951..a99bee6ed 100644 --- a/pkg/handlers/backup/arango_client.go +++ b/pkg/handlers/backup/arango_client.go @@ -1,7 +1,7 @@ // // DISCLAIMER // -// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany +// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -59,6 +59,11 @@ type ArangoBackupCreateResponse struct { // ArangoBackupClient interface with backup functionality for database type ArangoBackupClient interface { Create() (ArangoBackupCreateResponse, error) + + // CreateAsync creates a new backup asynchronously and returns the job ID in the error + // pass empty string to create a new backup + CreateAsync(jobID string) (ArangoBackupCreateResponse, error) + Get(driver.BackupID) (driver.BackupMeta, error) Upload(driver.BackupID) (driver.BackupTransferJobID, error) diff --git a/pkg/handlers/backup/arango_client_impl.go b/pkg/handlers/backup/arango_client_impl.go index b6fef08ee..06ca093a7 100644 --- a/pkg/handlers/backup/arango_client_impl.go +++ b/pkg/handlers/backup/arango_client_impl.go @@ -48,7 +48,7 @@ type arangoClientBackupImpl struct { func newArangoClientBackupFactory(handler *handler) ArangoClientFactory { return func(deployment *database.ArangoDeployment, backup *backupApi.ArangoBackup) (ArangoBackupClient, error) { ctx := context.Background() - client, err := arangod.CreateArangodDatabaseClient(ctx, handler.kubeClient.CoreV1(), deployment, false) + client, err := arangod.CreateArangodDatabaseClient(ctx, handler.kubeClient.CoreV1(), deployment, false, true) if err != nil { return nil, err } @@ -109,6 +109,43 @@ func (ac *arangoClientBackupImpl) Create() (ArangoBackupCreateResponse, error) { }, nil } +func (ac *arangoClientBackupImpl) CreateAsync(jobID string) (ArangoBackupCreateResponse, error) { + dt := globals.GetGlobalTimeouts().BackupArangoClientTimeout().Get() + + co := driver.BackupCreateOptions{} + + if opt := ac.backup.Spec.Options; opt != nil { + if allowInconsistent := opt.AllowInconsistent; allowInconsistent != nil { + co.AllowInconsistent = *allowInconsistent + } + } + + ctx, cancel := context.WithTimeout(context.Background(), dt) + defer cancel() + + if jobID == "" { + ctx = driver.WithAsync(ctx) + } else { + ctx = driver.WithAsyncID(ctx, jobID) + } + + id, resp, err := ac.driver.Backup().Create(ctx, &co) + if err != nil { + return ArangoBackupCreateResponse{}, err + } + + // Now ask for the version + meta, err := ac.Get(id) + if err != nil { + return ArangoBackupCreateResponse{}, err + } + + return ArangoBackupCreateResponse{ + PotentiallyInconsistent: resp.PotentiallyInconsistent, + BackupMeta: meta, + }, nil +} + func (ac *arangoClientBackupImpl) Get(backupID driver.BackupID) (driver.BackupMeta, error) { ctx, cancel := globals.GetGlobalTimeouts().BackupArangoClientTimeout().WithTimeout(context.Background()) defer cancel() @@ -217,11 +254,11 @@ func (ac *arangoClientBackupImpl) Progress(jobID driver.BackupTransferJobID) (Ar case "": completedCount++ default: - return ArangoBackupProgress{}, errors.Newf("Unknown transfere status: %s", status.Status) + return ArangoBackupProgress{}, errors.Newf("Unknown transfer status: %s", status.Status) } } - // Check if all defined servers are completed and total number of files is greater than 0 (there is at least 1 file per server) + // Check if all defined servers are completed and the total number of files is greater than 0 (there is at least 1 file per server) ret.Completed = completedCount == len(report.DBServers) && total > 0 if total != 0 { ret.Progress = (100 * done) / total diff --git a/pkg/handlers/backup/arango_client_mock_test.go b/pkg/handlers/backup/arango_client_mock_test.go index 4fcab27e7..a1b4ca664 100644 --- a/pkg/handlers/backup/arango_client_mock_test.go +++ b/pkg/handlers/backup/arango_client_mock_test.go @@ -21,12 +21,14 @@ package backup import ( + "strconv" "sync" "time" "k8s.io/apimachinery/pkg/util/uuid" "github.com/arangodb/go-driver" + "github.com/arangodb/go-driver/util/connection/wrappers/async" backupApi "github.com/arangodb/kube-arangodb/pkg/apis/backup/v1" database "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" @@ -70,6 +72,7 @@ type mockArangoClientBackupState struct { backups map[driver.BackupID]driver.BackupMeta progresses map[driver.BackupTransferJobID]ArangoBackupProgress + createDone bool errors mockErrorsArangoClientBackup } @@ -227,6 +230,21 @@ func (m *mockArangoClientBackup) Create() (ArangoBackupCreateResponse, error) { }, nil } +func (m *mockArangoClientBackup) CreateAsync(jobID string) (ArangoBackupCreateResponse, error) { + if m.state.errors.createError != nil { + return ArangoBackupCreateResponse{}, m.state.errors.createError + } + + if m.state.createDone { + return m.Create() + } + + if jobID == "" { + return ArangoBackupCreateResponse{}, async.NewErrorAsyncJobInProgress(strconv.Itoa(util.Rand().Int())) + } + return ArangoBackupCreateResponse{}, async.NewErrorAsyncJobInProgress(jobID) +} + func (m *mockArangoClientBackup) getIDs() []string { ret := make([]string, 0, len(m.state.backups)) diff --git a/pkg/handlers/backup/backup_suite_test.go b/pkg/handlers/backup/backup_suite_test.go index 7e350c35e..d10fdd8b6 100644 --- a/pkg/handlers/backup/backup_suite_test.go +++ b/pkg/handlers/backup/backup_suite_test.go @@ -75,9 +75,9 @@ func newObjectSet(state state.State) (*backupApi.ArangoBackup, *database.ArangoD namespace := string(uuid.NewUUID()) obj := newArangoBackup(name, namespace, name, state) - deployment := newArangoDeployment(namespace, name) + arangoDeployment := newArangoDeployment(namespace, name) - return obj, deployment + return obj, arangoDeployment } func newItem(o operation.Operation, namespace, name string) operation.Item { diff --git a/pkg/handlers/backup/handler.go b/pkg/handlers/backup/handler.go index 05a7f9eed..3831f481d 100644 --- a/pkg/handlers/backup/handler.go +++ b/pkg/handlers/backup/handler.go @@ -22,6 +22,7 @@ package backup import ( "context" + "errors" "fmt" "sync" "time" @@ -44,7 +45,7 @@ import ( "github.com/arangodb/kube-arangodb/pkg/operatorV2/event" "github.com/arangodb/kube-arangodb/pkg/operatorV2/operation" "github.com/arangodb/kube-arangodb/pkg/util" - "github.com/arangodb/kube-arangodb/pkg/util/errors" + adbErrors "github.com/arangodb/kube-arangodb/pkg/util/errors" ) var logger = logging.Global().RegisterAndGetLogger("backup-operator", logging.Info) @@ -226,7 +227,7 @@ func (h *handler) getDeploymentMutex(namespace, deployment string) *sync.Mutex { } func (h *handler) Handle(item operation.Item) error { - // Get Backup object. It also cover NotFound case + // Get Backup object. It also covers NotFound case b, err := h.client.BackupV1().ArangoBackups(item.Namespace).Get(context.Background(), item.Name, meta.GetOptions{}) if err != nil { if apiErrors.IsNotFound(err) { @@ -261,7 +262,7 @@ func (h *handler) Handle(item operation.Item) error { return nil } - // Create lock per namespace to ensure that we are not using 2 goroutines in same time + // Create lock per namespace to ensure that we are not using two goroutines in same time lock := h.getDeploymentMutex(b.Namespace, b.Spec.Deployment.Name) lock.Lock() defer lock.Unlock() @@ -298,7 +299,8 @@ func (h *handler) Handle(item operation.Item) error { cError := switchError(err) - if _, ok := cError.(temporaryError); ok { + var temporaryError temporaryError + if errors.As(cError, &temporaryError) { return cError } @@ -368,7 +370,7 @@ func (h *handler) processArangoBackup(backup *backupApi.ArangoBackup) (*backupAp return f(h, backup) } - return nil, errors.Newf("state %s is not supported", backup.Status.State) + return nil, adbErrors.Newf("state %s is not supported", backup.Status.State) } func (h *handler) CanBeHandled(item operation.Item) bool { diff --git a/pkg/handlers/backup/state.go b/pkg/handlers/backup/state.go index 979c254d3..ce64b928f 100644 --- a/pkg/handlers/backup/state.go +++ b/pkg/handlers/backup/state.go @@ -33,6 +33,7 @@ var ( backupApi.ArangoBackupStatePending: statePendingHandler, backupApi.ArangoBackupStateScheduled: stateScheduledHandler, backupApi.ArangoBackupStateCreate: stateCreateHandler, + backupApi.ArangoBackupStateCreating: stateCreatingHandler, backupApi.ArangoBackupStateCreateError: stateCreateErrorHandler, backupApi.ArangoBackupStateUpload: stateUploadHandler, backupApi.ArangoBackupStateUploading: stateUploadingHandler, diff --git a/pkg/handlers/backup/state_create.go b/pkg/handlers/backup/state_create.go index 13e22a31c..9c11288aa 100644 --- a/pkg/handlers/backup/state_create.go +++ b/pkg/handlers/backup/state_create.go @@ -22,8 +22,10 @@ package backup import ( "github.com/arangodb/go-driver" + "github.com/arangodb/go-driver/util/connection/wrappers/async" backupApi "github.com/arangodb/kube-arangodb/pkg/apis/backup/v1" + "github.com/arangodb/kube-arangodb/pkg/deployment/features" ) func stateCreateHandler(h *handler, backup *backupApi.ArangoBackup) (*backupApi.ArangoBackupStatus, error) { @@ -37,6 +39,14 @@ func stateCreateHandler(h *handler, backup *backupApi.ArangoBackup) (*backupApi. return nil, newTemporaryError(err) } + if features.AsyncBackupCreation().Enabled() { + return asyncBackup(client, backup) + } else { + return syncBackup(client, backup) + } +} + +func syncBackup(client ArangoBackupClient, backup *backupApi.ArangoBackup) (*backupApi.ArangoBackupStatus, error) { response, err := client.Create() if err != nil { return wrapUpdateStatus(backup, @@ -51,8 +61,7 @@ func stateCreateHandler(h *handler, backup *backupApi.ArangoBackup) (*backupApi. if err != nil { if driver.IsNotFoundGeneral(err) { return wrapUpdateStatus(backup, - updateStatusState(backupApi.ArangoBackupStateFailed, - "backup is not present after creation"), + updateStatusState(backupApi.ArangoBackupStateFailed, "backup is not present after creation"), cleanStatusJob(), ) } @@ -67,3 +76,32 @@ func stateCreateHandler(h *handler, backup *backupApi.ArangoBackup) (*backupApi. cleanBackOff(), ) } + +func asyncBackup(client ArangoBackupClient, backup *backupApi.ArangoBackup) (*backupApi.ArangoBackupStatus, error) { + _, err := client.CreateAsync("") + + if err == nil { + return wrapUpdateStatus(backup, + updateStatusState(backupApi.ArangoBackupStateCreateError, "Start Async backup failed - no jobID response"), + cleanStatusJob(), + updateStatusAvailable(false), + addBackOff(backup.Spec), + ) + } + + jobID, isAsyncId := async.IsAsyncJobInProgress(err) + if !isAsyncId { + return wrapUpdateStatus(backup, + updateStatusState(backupApi.ArangoBackupStateCreateError, "Create backup failed with error: %s", err.Error()), + cleanStatusJob(), + updateStatusAvailable(false), + addBackOff(backup.Spec), + ) + } + + return wrapUpdateStatus(backup, + updateStatusState(backupApi.ArangoBackupStateCreating, ""), + updateStatusJob(jobID, "0%"), + updateStatusAvailable(false), + ) +} diff --git a/pkg/handlers/backup/state_create_test.go b/pkg/handlers/backup/state_create_test.go index 417a8bcf4..72a3c452f 100644 --- a/pkg/handlers/backup/state_create_test.go +++ b/pkg/handlers/backup/state_create_test.go @@ -28,16 +28,25 @@ import ( "github.com/arangodb/go-driver" backupApi "github.com/arangodb/kube-arangodb/pkg/apis/backup/v1" + "github.com/arangodb/kube-arangodb/pkg/deployment/features" "github.com/arangodb/kube-arangodb/pkg/operatorV2/operation" "github.com/arangodb/kube-arangodb/pkg/util" ) func Test_State_Create_Common(t *testing.T) { + *features.AsyncBackupCreation().EnabledPointer() = false + wrapperUndefinedDeployment(t, backupApi.ArangoBackupStateCreate) + wrapperConnectionIssues(t, backupApi.ArangoBackupStateCreate) +} + +func Test_State_Create_Common_Async(t *testing.T) { + *features.AsyncBackupCreation().EnabledPointer() = true wrapperUndefinedDeployment(t, backupApi.ArangoBackupStateCreate) wrapperConnectionIssues(t, backupApi.ArangoBackupStateCreate) } func Test_State_Create_Success(t *testing.T) { + *features.AsyncBackupCreation().EnabledPointer() = false // Arrange handler, mock := newErrorsFakeHandler(mockErrorsArangoClientBackup{}) @@ -62,7 +71,27 @@ func Test_State_Create_Success(t *testing.T) { compareBackupMeta(t, backupMeta, newObj) } +func Test_State_Create_Success_Async(t *testing.T) { + *features.AsyncBackupCreation().EnabledPointer() = true + // Arrange + handler, _ := newErrorsFakeHandler(mockErrorsArangoClientBackup{}) + + obj, deployment := newObjectSet(backupApi.ArangoBackupStateCreate) + + // Act + createArangoDeployment(t, handler, deployment) + createArangoBackup(t, handler, obj) + + require.NoError(t, handler.Handle(newItemFromBackup(operation.Update, obj))) + + // Assert + newObj := refreshArangoBackup(t, handler, obj) + checkBackup(t, newObj, backupApi.ArangoBackupStateCreating, false) +} + func Test_State_Create_SuccessForced(t *testing.T) { + *features.AsyncBackupCreation().EnabledPointer() = false + // Arrange handler, mock := newErrorsFakeHandler(mockErrorsArangoClientBackup{}) @@ -93,6 +122,8 @@ func Test_State_Create_SuccessForced(t *testing.T) { } func Test_State_Create_Upload(t *testing.T) { + *features.AsyncBackupCreation().EnabledPointer() = false + // Arrange handler, mock := newErrorsFakeHandler(mockErrorsArangoClientBackup{}) @@ -121,10 +152,34 @@ func Test_State_Create_Upload(t *testing.T) { } func Test_State_Create_CreateError(t *testing.T) { + *features.AsyncBackupCreation().EnabledPointer() = false + // Arrange - error := newFatalErrorf("error") handler, _ := newErrorsFakeHandler(mockErrorsArangoClientBackup{ - createError: error, + createError: newFatalErrorf("error"), + }) + + obj, deployment := newObjectSet(backupApi.ArangoBackupStateCreate) + + // Act + createArangoDeployment(t, handler, deployment) + createArangoBackup(t, handler, obj) + + require.NoError(t, handler.Handle(newItemFromBackup(operation.Update, obj))) + + // Assert + newObj := refreshArangoBackup(t, handler, obj) + require.Equal(t, newObj.Status.State, backupApi.ArangoBackupStateCreateError) + require.Nil(t, newObj.Status.Backup) + require.False(t, newObj.Status.Available) +} + +func Test_State_Create_CreateError_Async(t *testing.T) { + *features.AsyncBackupCreation().EnabledPointer() = true + + // Arrange + handler, _ := newErrorsFakeHandler(mockErrorsArangoClientBackup{ + createError: newFatalErrorf("error"), }) obj, deployment := newObjectSet(backupApi.ArangoBackupStateCreate) diff --git a/pkg/handlers/backup/state_creating.go b/pkg/handlers/backup/state_creating.go new file mode 100644 index 000000000..56eb835b6 --- /dev/null +++ b/pkg/handlers/backup/state_creating.go @@ -0,0 +1,83 @@ +// +// DISCLAIMER +// +// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// + +package backup + +import ( + "github.com/arangodb/go-driver" + "github.com/arangodb/go-driver/util/connection/wrappers/async" + + backupApi "github.com/arangodb/kube-arangodb/pkg/apis/backup/v1" +) + +func stateCreatingHandler(h *handler, backup *backupApi.ArangoBackup) (*backupApi.ArangoBackupStatus, error) { + deployment, err := h.getArangoDeploymentObject(backup) + if err != nil { + return nil, err + } + + client, err := h.arangoClientFactory(deployment, backup) + if err != nil { + return nil, newTemporaryError(err) + } + + if backup.Status.Progress == nil { + return nil, newFatalErrorf("missing field .status.progress") + } + + response, err := client.CreateAsync(backup.Status.Progress.JobID) + if err != nil { + _, isAsyncId := async.IsAsyncJobInProgress(err) + if isAsyncId { + return wrapUpdateStatus(backup, + updateStatusState(backupApi.ArangoBackupStateCreating, ""), + updateStatusAvailable(false), + updateStatusJob(backup.Status.Progress.JobID, "50%"), + ) + } + + return wrapUpdateStatus(backup, + updateStatusState(backupApi.ArangoBackupStateCreateError, "Create backup failed with error: %s", err), + cleanStatusJob(), + updateStatusAvailable(false), + addBackOff(backup.Spec), + ) + } + + backupMeta, err := client.Get(response.ID) + if err != nil { + if driver.IsNotFoundGeneral(err) { + return wrapUpdateStatus(backup, + updateStatusState(backupApi.ArangoBackupStateFailed, "backup is not present after creation"), + cleanStatusJob(), + ) + } + + return nil, newFatalError(err) + } + + return wrapUpdateStatus(backup, + updateStatusState(backupApi.ArangoBackupStateReady, ""), + cleanStatusJob(), + updateStatusAvailable(true), + updateStatusBackup(backupMeta), + cleanBackOff(), + ) +} diff --git a/pkg/handlers/backup/state_creating_test.go b/pkg/handlers/backup/state_creating_test.go new file mode 100644 index 000000000..6cbbcc437 --- /dev/null +++ b/pkg/handlers/backup/state_creating_test.go @@ -0,0 +1,119 @@ +// +// DISCLAIMER +// +// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// + +package backup + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/arangodb/go-driver" + + backupApi "github.com/arangodb/kube-arangodb/pkg/apis/backup/v1" + "github.com/arangodb/kube-arangodb/pkg/operatorV2/operation" +) + +func Test_State_Creating_Common(t *testing.T) { + wrapperUndefinedDeployment(t, backupApi.ArangoBackupStateCreating) + wrapperConnectionIssues(t, backupApi.ArangoBackupStateCreating) +} + +func Test_State_Creating_Success(t *testing.T) { + // Arrange + handler, mock := newErrorsFakeHandler(mockErrorsArangoClientBackup{}) + + obj, deployment := newObjectSet(backupApi.ArangoBackupStateCreating) + + obj.Status.Progress = &backupApi.ArangoBackupProgress{ + JobID: "jobID", + } + + // Act + createArangoDeployment(t, handler, deployment) + createArangoBackup(t, handler, obj) + + t.Run("Create in progress, then done", func(t *testing.T) { + require.NoError(t, handler.Handle(newItemFromBackup(operation.Update, obj))) + + // Assert + newObj := refreshArangoBackup(t, handler, obj) + checkBackup(t, newObj, backupApi.ArangoBackupStateCreating, false) + + require.NotNil(t, newObj.Status.Progress) + + require.Equal(t, fmt.Sprintf("%d%%", 50), newObj.Status.Progress.Progress) + require.Equal(t, obj.Status.Progress.JobID, newObj.Status.Progress.JobID) + + mock.state.createDone = true + require.NoError(t, handler.Handle(newItemFromBackup(operation.Update, obj))) + + // Assert + newObj = refreshArangoBackup(t, handler, obj) + checkBackup(t, newObj, backupApi.ArangoBackupStateReady, true) + require.Nil(t, newObj.Status.Progress) + + backups := mock.getIDs() + require.Len(t, backups, 1) + + backupMeta, err := mock.Get(driver.BackupID(backups[0])) + require.NoError(t, err) + + compareBackupMeta(t, backupMeta, newObj) + + }) +} + +func Test_State_Creating_Failed(t *testing.T) { + // Arrange + handler, _ := newErrorsFakeHandler(mockErrorsArangoClientBackup{ + createError: driver.ArangoError{ + Code: 400, + }, + }) + + obj, deployment := newObjectSet(backupApi.ArangoBackupStateCreating) + + obj.Status.Progress = &backupApi.ArangoBackupProgress{ + JobID: "jobID", + } + + // Act + createArangoDeployment(t, handler, deployment) + createArangoBackup(t, handler, obj) + + t.Run("Create Backup returns error", func(t *testing.T) { + require.NoError(t, handler.Handle(newItemFromBackup(operation.Update, obj))) + + // Create error state should be set + newObj := refreshArangoBackup(t, handler, obj) + checkBackup(t, newObj, backupApi.ArangoBackupStateCreateError, false) + require.Nil(t, newObj.Status.Progress) + + require.NoError(t, handler.Handle(newItemFromBackup(operation.Update, obj))) + + // No retry - state should change to failed + newObj = refreshArangoBackup(t, handler, obj) + checkBackup(t, newObj, backupApi.ArangoBackupStateFailed, false) + require.Nil(t, newObj.Status.Progress) + + }) +} diff --git a/pkg/util/arangod/client.go b/pkg/util/arangod/client.go index 9c6faace0..567e5b4bc 100644 --- a/pkg/util/arangod/client.go +++ b/pkg/util/arangod/client.go @@ -1,7 +1,7 @@ // // DISCLAIMER // -// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany +// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -30,9 +30,10 @@ import ( typedCore "k8s.io/client-go/kubernetes/typed/core/v1" - driver "github.com/arangodb/go-driver" + "github.com/arangodb/go-driver" "github.com/arangodb/go-driver/http" "github.com/arangodb/go-driver/jwt" + "github.com/arangodb/go-driver/util/connection/wrappers/async" api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" "github.com/arangodb/kube-arangodb/pkg/apis/shared" @@ -114,10 +115,10 @@ var ( ) // CreateArangodClient creates a go-driver client for a specific member in the given group. -func CreateArangodClient(ctx context.Context, cli typedCore.CoreV1Interface, apiObject *api.ArangoDeployment, group api.ServerGroup, id string) (driver.Client, error) { +func CreateArangodClient(ctx context.Context, cli typedCore.CoreV1Interface, apiObject *api.ArangoDeployment, group api.ServerGroup, id string, asyncSupport bool) (driver.Client, error) { // Create connection dnsName := k8sutil.CreatePodDNSNameWithDomain(apiObject, apiObject.GetAcceptedSpec().ClusterDomain, group.AsRole(), id) - c, err := createArangodClientForDNSName(ctx, cli, apiObject, dnsName, false) + c, err := createArangodClientForDNSName(ctx, cli, apiObject, dnsName, false, asyncSupport) if err != nil { return nil, errors.WithStack(err) } @@ -125,10 +126,10 @@ func CreateArangodClient(ctx context.Context, cli typedCore.CoreV1Interface, api } // CreateArangodDatabaseClient creates a go-driver client for accessing the entire cluster (or single server). -func CreateArangodDatabaseClient(ctx context.Context, cli typedCore.CoreV1Interface, apiObject *api.ArangoDeployment, shortTimeout bool) (driver.Client, error) { +func CreateArangodDatabaseClient(ctx context.Context, cli typedCore.CoreV1Interface, apiObject *api.ArangoDeployment, shortTimeout bool, asyncSupport bool) (driver.Client, error) { // Create connection dnsName := k8sutil.CreateDatabaseClientServiceDNSNameWithDomain(apiObject, apiObject.GetAcceptedSpec().ClusterDomain) - c, err := createArangodClientForDNSName(ctx, cli, apiObject, dnsName, shortTimeout) + c, err := createArangodClientForDNSName(ctx, cli, apiObject, dnsName, shortTimeout, asyncSupport) if err != nil { return nil, errors.WithStack(err) } @@ -137,9 +138,9 @@ func CreateArangodDatabaseClient(ctx context.Context, cli typedCore.CoreV1Interf // CreateArangodImageIDClient creates a go-driver client for an ArangoDB instance // running in an Image-ID pod. -func CreateArangodImageIDClient(ctx context.Context, deployment k8sutil.APIObject, ip string) (driver.Client, error) { +func CreateArangodImageIDClient(ctx context.Context, deployment k8sutil.APIObject, ip string, asyncSupport bool) (driver.Client, error) { // Create connection - c, err := createArangodClientForDNSName(ctx, nil, nil, ip, false) + c, err := createArangodClientForDNSName(ctx, nil, nil, ip, false, asyncSupport) if err != nil { return nil, errors.WithStack(err) } @@ -147,7 +148,7 @@ func CreateArangodImageIDClient(ctx context.Context, deployment k8sutil.APIObjec } // CreateArangodClientForDNSName creates a go-driver client for a given DNS name. -func createArangodClientForDNSName(ctx context.Context, cli typedCore.CoreV1Interface, apiObject *api.ArangoDeployment, dnsName string, shortTimeout bool) (driver.Client, error) { +func createArangodClientForDNSName(ctx context.Context, cli typedCore.CoreV1Interface, apiObject *api.ArangoDeployment, dnsName string, shortTimeout bool, asyncSupport bool) (driver.Client, error) { connConfig := createArangodHTTPConfigForDNSNames(apiObject, []string{dnsName}, shortTimeout) // TODO deal with TLS with proper CA checking conn, err := http.NewConnection(connConfig) @@ -155,6 +156,11 @@ func createArangodClientForDNSName(ctx context.Context, cli typedCore.CoreV1Inte return nil, errors.WithStack(err) } + if asyncSupport { + // Wrap connection with async wrapper + conn = async.NewConnectionAsyncWrapper(conn) + } + // Create client config := driver.ClientConfig{ Connection: conn,