1
0
Fork 0
mirror of https://github.com/arangodb/kube-arangodb.git synced 2024-12-14 11:57:37 +00:00

Remove obsolete docs, restructure for better UX (#1447)

This commit is contained in:
Nikita Vaniasin 2023-10-19 09:14:14 +02:00 committed by GitHub
parent 55634ba323
commit 6f6fcb8ba4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
47 changed files with 285 additions and 1418 deletions

View file

@ -3,8 +3,9 @@
## [master](https://github.com/arangodb/kube-arangodb/tree/master) (N/A)
- (Maintenance) Update go-driver to v1.6.0, update IsNotFound() checks
- (Improvement) Print assigned node name to log and condition message when pod is scheduled
- (Maintenance) Remove obsolete docs, restructure for better UX, generate index files
## [1.2.34](https://github.com/arangodb/kube-arangodb/tree/1.2.34) (2023-10-16
## [1.2.34](https://github.com/arangodb/kube-arangodb/tree/1.2.34) (2023-10-16)
- (Bugfix) Fix make manifests-crd-file command
- (Improvement) Allow tcp:// and ssl:// protocols in endpoints for members
- (Maintenance) Reorganize package imports / move common code to separate repos

View file

@ -58,33 +58,33 @@ covers individual newer features separately.
#### Operator Features
<!-- START(featuresCommunityTable) -->
| Feature | Operator Version | Introduced | ArangoDB Version | ArangoDB Edition | State | Enabled | Flag | Remarks |
|:-------------------------------------------------------------------------------------|:-----------------|:-----------|:-----------------|:----------------------|:-------------|:--------|:------------------------------------------------------|:-----------------------------------------------------------------------------------|
| Enforced ResignLeadership | 1.2.34 | 1.2.34 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.enforced-resign-leadership | Enforce ResignLeadership and ensure that Leaders are moved from restarted DBServer |
| Copy resources spec to init containers | 1.2.33 | 1.2.33 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.init-containers-copy-resources | Copy resources spec to built-in init containers if they are not specified |
| [Rebalancer V2](docs/design/features/rebalancer_v2.md) | 1.2.31 | 1.2.31 | >= 3.10.0 | Community, Enterprise | Alpha | False | --deployment.feature.rebalancer-v2 | N/A |
| [Secured containers](docs/design/features/secured_containers.md) | 1.2.31 | 1.2.31 | >= 3.8.0 | Community, Enterprise | Alpha | False | --deployment.feature.secured-containers | If set to True Operator will run containers in secure mode |
| Version Check V2 | 1.2.31 | 1.2.31 | >= 3.8.0 | Community, Enterprise | Alpha | False | --deployment.feature.upgrade-version-check-V2 | N/A |
| [Operator Ephemeral Volumes](docs/design/features/ephemeral_volumes.md) | 1.2.31 | 1.2.2 | >= 3.8.0 | Community, Enterprise | Beta | False | --deployment.feature.ephemeral-volumes | N/A |
| [Force Rebuild Out Synced Shards](docs/design/features/rebuild_out_synced_shards.md) | 1.2.27 | 1.2.27 | >= 3.8.0 | Community, Enterprise | Production | False | --deployment.feature.force-rebuild-out-synced-shards | It should be used only if user is aware of the risks. |
| [Spec Default Restore](docs/design/features/deployment_spec_defaults.md) | 1.2.25 | 1.2.21 | >= 3.8.0 | Community, Enterprise | Beta | True | --deployment.feature.deployment-spec-defaults-restore | If set to False Operator will not change ArangoDeployment Spec |
| Version Check | 1.2.23 | 1.1.4 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.upgrade-version-check | N/A |
| [Failover Leader service](docs/design/features/failover_leader_service.md) | 1.2.13 | 1.2.13 | >= 3.8.0 | Community, Enterprise | Production | False | --deployment.feature.failover-leadership | N/A |
| Graceful Restart | 1.2.5 | 1.0.7 | >= 3.8.0 | Community, Enterprise | Production | True | ---deployment.feature.graceful-shutdown | N/A |
| Optional Graceful Restart | 1.2.0 | 1.2.5 | >= 3.8.0 | Community, Enterprise | Production | False | --deployment.feature.optional-graceful-shutdown | N/A |
| Operator Internal Metrics Exporter | 1.2.0 | 1.2.0 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.metrics-exporter | N/A |
| Operator Maintenance Management Support | 1.2.0 | 1.0.7 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.maintenance | N/A |
| Encryption Key Rotation Support | 1.2.0 | 1.0.3 | >= 3.8.0 | Enterprise | NotSupported | False | --deployment.feature.encryption-rotation | N/A |
| TLS Runtime Rotation Support | 1.1.0 | 1.0.4 | >= 3.8.0 | Enterprise | Production | True | --deployment.feature.tls-rotation | N/A |
| JWT Rotation Support | 1.1.0 | 1.0.3 | >= 3.8.0 | Enterprise | Production | True | --deployment.feature.jwt-rotation | N/A |
| Operator Single Mode | 1.0.4 | 1.0.4 | >= 3.8.0 | Community, Enterprise | Production | False | --mode.single | Only 1 instance of Operator allowed in namespace when feature is enabled |
| TLS SNI Support | 1.0.3 | 1.0.3 | >= 3.8.0 | Enterprise | Production | True | --deployment.feature.tls-sni | N/A |
| Disabling of liveness probes | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A |
| Pod Disruption Budgets | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A |
| Prometheus Metrics Exporter | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | Prometheus required |
| Sidecar Containers | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A |
| Volume Claim Templates | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A |
| Volume Resizing | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A |
| Feature | Operator Version | Introduced | ArangoDB Version | ArangoDB Edition | State | Enabled | Flag | Remarks |
|:------------------------------------------------------------------------------|:-----------------|:-----------|:-----------------|:----------------------|:-------------|:--------|:------------------------------------------------------|:-----------------------------------------------------------------------------------|
| Enforced ResignLeadership | 1.2.34 | 1.2.34 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.enforced-resign-leadership | Enforce ResignLeadership and ensure that Leaders are moved from restarted DBServer |
| Copy resources spec to init containers | 1.2.33 | 1.2.33 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.init-containers-copy-resources | Copy resources spec to built-in init containers if they are not specified |
| [Rebalancer V2](docs/features/rebalancer_v2.md) | 1.2.31 | 1.2.31 | >= 3.10.0 | Community, Enterprise | Alpha | False | --deployment.feature.rebalancer-v2 | N/A |
| [Secured containers](docs/features/secured_containers.md) | 1.2.31 | 1.2.31 | >= 3.8.0 | Community, Enterprise | Alpha | False | --deployment.feature.secured-containers | If set to True Operator will run containers in secure mode |
| Version Check V2 | 1.2.31 | 1.2.31 | >= 3.8.0 | Community, Enterprise | Alpha | False | --deployment.feature.upgrade-version-check-V2 | N/A |
| [Operator Ephemeral Volumes](docs/features/ephemeral_volumes.md) | 1.2.31 | 1.2.2 | >= 3.8.0 | Community, Enterprise | Beta | False | --deployment.feature.ephemeral-volumes | N/A |
| [Force Rebuild Out Synced Shards](docs/features/rebuild_out_synced_shards.md) | 1.2.27 | 1.2.27 | >= 3.8.0 | Community, Enterprise | Production | False | --deployment.feature.force-rebuild-out-synced-shards | It should be used only if user is aware of the risks. |
| [Spec Default Restore](docs/features/deployment_spec_defaults.md) | 1.2.25 | 1.2.21 | >= 3.8.0 | Community, Enterprise | Beta | True | --deployment.feature.deployment-spec-defaults-restore | If set to False Operator will not change ArangoDeployment Spec |
| Version Check | 1.2.23 | 1.1.4 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.upgrade-version-check | N/A |
| [Failover Leader service](docs/features/failover_leader_service.md) | 1.2.13 | 1.2.13 | >= 3.8.0 | Community, Enterprise | Production | False | --deployment.feature.failover-leadership | N/A |
| Graceful Restart | 1.2.5 | 1.0.7 | >= 3.8.0 | Community, Enterprise | Production | True | ---deployment.feature.graceful-shutdown | N/A |
| Optional Graceful Restart | 1.2.0 | 1.2.5 | >= 3.8.0 | Community, Enterprise | Production | False | --deployment.feature.optional-graceful-shutdown | N/A |
| Operator Internal Metrics Exporter | 1.2.0 | 1.2.0 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.metrics-exporter | N/A |
| Operator Maintenance Management Support | 1.2.0 | 1.0.7 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.maintenance | N/A |
| Encryption Key Rotation Support | 1.2.0 | 1.0.3 | >= 3.8.0 | Enterprise | NotSupported | False | --deployment.feature.encryption-rotation | N/A |
| TLS Runtime Rotation Support | 1.1.0 | 1.0.4 | >= 3.8.0 | Enterprise | Production | True | --deployment.feature.tls-rotation | N/A |
| JWT Rotation Support | 1.1.0 | 1.0.3 | >= 3.8.0 | Enterprise | Production | True | --deployment.feature.jwt-rotation | N/A |
| Operator Single Mode | 1.0.4 | 1.0.4 | >= 3.8.0 | Community, Enterprise | Production | False | --mode.single | Only 1 instance of Operator allowed in namespace when feature is enabled |
| TLS SNI Support | 1.0.3 | 1.0.3 | >= 3.8.0 | Enterprise | Production | True | --deployment.feature.tls-sni | N/A |
| Disabling of liveness probes | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A |
| Pod Disruption Budgets | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A |
| Prometheus Metrics Exporter | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | Prometheus required |
| Sidecar Containers | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A |
| Volume Claim Templates | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A |
| Volume Resizing | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A |
<!-- END(featuresCommunityTable) -->
@ -97,7 +97,7 @@ To upgrade to the Enterprise Edition, you need to get in touch with the ArangoDB
|:-------------------------------------------------------|:-----------------|:-----------|:-----------------|:-----------------|:-----------|:--------|:-----|:----------------------------------------------------------------------------|
| AgencyCache | 1.2.30 | 1.2.30 | >= 3.8.0 | Enterprise | Production | True | N/A | Enable Agency Cache mechanism in the Operator (Increase limit of the nodes) |
| Member Maintenance Support | 1.2.25 | 1.2.16 | >= 3.8.0 | Enterprise | Production | True | N/A | Enable Member Maintenance during planned restarts |
| [Rebalancer](docs/design/features/rebalancer.md) | 1.2.15 | 1.2.5 | >= 3.8.0 | Enterprise | Production | True | N/A | N/A |
| [Rebalancer](docs/features/rebalancer.md) | 1.2.15 | 1.2.5 | >= 3.8.0 | Enterprise | Production | True | N/A | N/A |
| [TopologyAwareness](docs/design/topology_awareness.md) | 1.2.4 | 1.2.4 | >= 3.8.0 | Enterprise | Production | True | N/A | N/A |
<!-- END(featuresEnterpriseTable) -->

View file

@ -2,10 +2,10 @@
- [Tutorial](https://www.arangodb.com/docs/stable/tutorials-kubernetes.html)
- [Documentation](https://www.arangodb.com/docs/stable/deployment-kubernetes.html)
- [Design documents](./design/README.md)
- [Providers](./providers/README.md)
# ArangoDB Kubernetes Operator Generated Documentation
- [ArangoDB Operator Metrics & Alerts](./generated/metrics/README.md)
- [ArangoDB Actions](./generated/actions.md)
- [Architecture](./design/README.md)
- [Features description and usage](./features/README.md)
- [Custom Resources API Reference](./api/README.md)
- [Operator Metrics & Alerts](./generated/metrics/README.md)
- [Operator Actions](./generated/actions.md)
- Known issues (TBD)
- [How-to ...](how-to/README.md)

View file

@ -597,7 +597,7 @@ Architecture defines the list of supported architectures.
First element on the list is marked as default architecture.
Links:
* [Architecture Change](/docs/design/arch_change.md)
* [Architecture Change](/docs/how-to/arch_change.md)
Default Value: ['amd64']

View file

@ -4,7 +4,11 @@
### .spec.deletion_priority: int
[Code Reference](/pkg/apis/deployment/v1/arango_member_spec.go#L44)
DeletionPriority define Deletion Priority.
Higher value means higher priority. Default is 0.
Example: set 1 for Coordinator which should be deleted first and scale down coordinators by one.
[Code Reference](/pkg/apis/deployment/v1/arango_member_spec.go#L47)
### .spec.deploymentUID: string

5
docs/api/README.md Normal file
View file

@ -0,0 +1,5 @@
# Custom Resources API Reference
- [ArangoDeployment.V1](./ArangoDeployment.V1.md)
- [ArangoMember.V1](./ArangoMember.V1.md)

View file

@ -1,524 +0,0 @@
# ArangoDB on bare metal Kubernetes
A note of warning for lack of a better word upfront: Kubernetes is
awesome and powerful. As with awesome and powerful things, there is
infinite ways of setting up a k8s cluster. With great flexibility
comes great complexity. There are infinite ways of hitting barriers.
This guide is a walk through for, again in lack of a better word,
a reasonable and flexible setup to get to an ArangoDB cluster setup on
a bare metal kubernetes setup.
## BEWARE: Do not use this setup for production!
This guide does not involve setting up dedicated master nodes or high
availability for Kubernetes, but uses for sake of simplicity a single untainted
master. This is the very definition of a test environment.
If you are interested in running a high available Kubernetes setup, please
refer to: [Creating Highly Available Clusters with kubeadm](https://kubernetes.io/docs/setup/independent/high-availability/)
## Requirements
Let there be 3 Linux boxes, `kube01 (192.168.10.61)`, `kube02 (192.168.10.62)`
and `kube03 (192.168.10.3)`, with `kubeadm` and `kubectl` installed and off we go:
* `kubeadm`, `kubectl` version `>=1.10`
## Initialize the master node
The master node is outstanding in that it handles the API server and some other
vital infrastructure
```
sudo kubeadm init --pod-network-cidr=10.244.0.0/16
```
```
[init] Using Kubernetes version: v1.13.2
[preflight] Running pre-flight checks
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Activating the kubelet service
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "ca" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [kube01 kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.96.0.1 192.168.10.61]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "front-proxy-ca" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Generating "etcd/ca" certificate and key
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [kube01 localhost] and IPs [192.168.10.61 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [kube01 localhost] and IPs [192.168.10.61 127.0.0.1 ::1]
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "sa" key and public key
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[kubeconfig] Writing "admin.conf" kubeconfig file
[kubeconfig] Writing "kubelet.conf" kubeconfig file
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"
[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests". This can take up to 4m0s
[apiclient] All control plane components are healthy after 23.512869 seconds
[uploadconfig] storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
[kubelet] Creating a ConfigMap "kubelet-config-1.13" in namespace kube-system with the configuration for the kubelets in the cluster
[patchnode] Uploading the CRI Socket information "/var/run/dockershim.sock" to the Node API object "kube01" as an annotation
[mark-control-plane] Marking the node kube01 as control-plane by adding the label "node-role.kubernetes.io/master=''"
[mark-control-plane] Marking the node kube01 as control-plane by adding the taints [node-role.kubernetes.io/master:NoSchedule]
[bootstrap-token] Using token: blcr1y.49wloegyaugice8a
[bootstrap-token] Configuring bootstrap tokens, cluster-info ConfigMap, RBAC Roles
[bootstraptoken] configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials
[bootstraptoken] configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token
[bootstraptoken] configured RBAC rules to allow certificate rotation for all node client certificates in the cluster
[bootstraptoken] creating the "cluster-info" ConfigMap in the "kube-public" namespace
[addons] Applied essential addon: CoreDNS
[addons] Applied essential addon: kube-proxy
Your Kubernetes master has initialized successfully!
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
You can now join any number of machines by running the following on each node as root:
kubeadm join 192.168.10.61:6443 --token blcr1y.49wloegyaugice8a --discovery-token-ca-cert-hash sha256:0505933664d28054a62298c68dc91e9b2b5cf01ecfa2228f3c8fa2412b7a78c8
```
Go ahead and do as above instructed and see into getting kubectl to work on the master:
```
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
```
## Deploy a pod network
For this guide, we go with **flannel**, as it is an easy way of setting up a
layer 3 network, which uses the Kubernetes API and just works anywhere, where a
network between the involved machines works:
```
kubectl apply -f \
https://raw.githubusercontent.com/coreos/flannel/bc79dd1505b0c8681ece4de4c0d86c5cd2643275/Documentation/kube-flannel.yml
```
```
clusterrole.rbac.authorization.k8s.io/flannel created
clusterrolebinding.rbac.authorization.k8s.io/flannel created
serviceaccount/flannel created
configmap/kube-flannel-cfg created
daemonset.extensions/kube-flannel-ds-amd64 created
daemonset.extensions/kube-flannel-ds-arm64 created
daemonset.extensions/kube-flannel-ds-arm created
daemonset.extensions/kube-flannel-ds-ppc64le created
daemonset.extensions/kube-flannel-ds-s390x created
```
## Join remaining nodes
Run the above join commands on the nodes `kube02` and `kube03`. Below is the
output on `kube02` for the setup for this guide:
```
sudo kubeadm join 192.168.10.61:6443 --token blcr1y.49wloegyaugice8a --discovery-token-ca-cert-hash sha256:0505933664d28054a62298c68dc91e9b2b5cf01ecfa2228f3c8fa2412b7a78c8
```
```
[preflight] Running pre-flight checks
[discovery] Trying to connect to API Server "192.168.10.61:6443"
[discovery] Created cluster-info discovery client, requesting info from "https:// 192.168.10.61:6443"
[discovery] Requesting info from "https://192.168.10.61:6443" again to validate TLS against the pinned public key
[discovery] Cluster info signature and contents are valid and TLS certificate validates against pinned roots, will use API Server "192.168.10.61:6443"
[discovery] Successfully established connection with API Server "192.168.10.61:6443"
[join] Reading configuration from the cluster...
[join] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'
[kubelet] Downloading configuration for the kubelet from the "kubelet-config-1.13" ConfigMap in the kube-system namespace
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Activating the kubelet service
[tlsbootstrap] Waiting for the kubelet to perform the TLS Bootstrap...
[patchnode] Uploading the CRI Socket information "/var/run/dockershim.sock" to the Node API object "kube02" as an annotation
This node has joined the cluster:
* Certificate signing request was sent to apiserver and a response was received.
* The Kubelet was informed of the new secure connection details.
Run 'kubectl get nodes' on the master to see this node join the cluster.
```
## Untaint master node
```
kubectl taint nodes --all node-role.kubernetes.io/master-
```
```
node/kube01 untainted
taint "node-role.kubernetes.io/master:" not found
taint "node-role.kubernetes.io/master:" not found
```
## Wait for nodes to get ready and sanity checking
After some brief period, you should see that your nodes are good to go:
```
kubectl get nodes
```
```
NAME STATUS ROLES AGE VERSION
kube01 Ready master 38m v1.13.2
kube02 Ready <none> 13m v1.13.2
kube03 Ready <none> 63s v1.13.2
```
Just a quick sanity check to see, that your cluster is up and running:
```
kubectl get all --all-namespaces
```
```
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-system pod/coredns-86c58d9df4-r9l5c 1/1 Running 2 41m
kube-system pod/coredns-86c58d9df4-swzpx 1/1 Running 2 41m
kube-system pod/etcd-kube01 1/1 Running 2 40m
kube-system pod/kube-apiserver-kube01 1/1 Running 2 40m
kube-system pod/kube-controller-manager-kube01 1/1 Running 2 40m
kube-system pod/kube-flannel-ds-amd64-hppt4 1/1 Running 3 16m
kube-system pod/kube-flannel-ds-amd64-kt6jh 1/1 Running 1 3m41s
kube-system pod/kube-flannel-ds-amd64-tg7gz 1/1 Running 2 20m
kube-system pod/kube-proxy-f2g2q 1/1 Running 2 41m
kube-system pod/kube-proxy-gt9hh 1/1 Running 0 3m41s
kube-system pod/kube-proxy-jwmq7 1/1 Running 2 16m
kube-system pod/kube-scheduler-kube01 1/1 Running 2 40m
NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
default service/kubernetes ClusterIP 10.96.0.1 <none> 443/TCP 41m
kube-system service/kube-dns ClusterIP 10.96.0.10 <none> 53/UDP,53/TCP 41m
```
## Deploy helm
- Obtain current [helm release](https://github.com/helm/helm/releases) for your architecture
- Create tiller user
```
kubectl create serviceaccount --namespace kube-system tiller
```
```
serviceaccount/tiller created
```
- Attach `tiller` to proper role
```
kubectl create clusterrolebinding tiller-cluster-rule \
--clusterrole=cluster-admin --serviceaccount=kube-system:tiller
```
```
clusterrolebinding.rbac.authorization.k8s.io/tiller-cluster-rule created
```
- Initialise helm
```
helm init --service-account tiller
```
```
$HELM_HOME has been configured at /home/xxx/.helm.
...
Happy Helming!
Tiller (the Helm server-side component) has been
installed into your Kubernetes Cluster.
```
## Deploy ArangoDB operator charts
- Deploy ArangoDB custom resource definition chart
```
helm install https://github.com/arangodb/kube-arangodb/releases/download/0.3.7/kube-arangodb-crd.tgz
```
```
NAME: hoping-gorilla
LAST DEPLOYED: Mon Jan 14 06:10:27 2019
NAMESPACE: default
STATUS: DEPLOYED
RESOURCES:
==> v1beta1/CustomResourceDefinition
NAME AGE
arangodeployments.database.arangodb.com 0s
arangodeploymentreplications.replication.database.arangodb.com 0s
NOTES:
kube-arangodb-crd has been deployed successfully!
Your release is named 'hoping-gorilla'.
You can now continue install kube-arangodb chart.
```
- Deploy ArangoDB operator chart
```
helm install https://github.com/arangodb/kube-arangodb/releases/download/0.3.7/kube-arangodb.tgz
```
```
NAME: illocutionary-whippet
LAST DEPLOYED: Mon Jan 14 06:11:58 2019
NAMESPACE: default
STATUS: DEPLOYED
RESOURCES:
==> v1beta1/ClusterRole
NAME AGE
illocutionary-whippet-deployment-replications 0s
illocutionary-whippet-deployment-replication-operator 0s
illocutionary-whippet-deployments 0s
illocutionary-whippet-deployment-operator 0s
==> v1beta1/ClusterRoleBinding
NAME AGE
illocutionary-whippet-deployment-replication-operator-default 0s
illocutionary-whippet-deployment-operator-default 0s
==> v1beta1/RoleBinding
NAME AGE
illocutionary-whippet-deployment-replications 0s
illocutionary-whippet-deployments 0s
==> v1/Service
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
arango-deployment-replication-operator ClusterIP 10.107.2.133 <none> 8528/TCP 0s
arango-deployment-operator ClusterIP 10.104.189.81 <none> 8528/TCP 0s
==> v1beta1/Deployment
NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE
arango-deployment-replication-operator 2 2 2 0 0s
arango-deployment-operator 2 2 2 0 0s
==> v1/Pod(related)
NAME READY STATUS RESTARTS AGE
arango-deployment-replication-operator-5f679fbfd8-nk8kz 0/1 Pending 0 0s
arango-deployment-replication-operator-5f679fbfd8-pbxdl 0/1 ContainerCreating 0 0s
arango-deployment-operator-65f969fc84-gjgl9 0/1 Pending 0 0s
arango-deployment-operator-65f969fc84-wg4nf 0/1 ContainerCreating 0 0s
NOTES:
kube-arangodb has been deployed successfully!
Your release is named 'illocutionary-whippet'.
You can now deploy ArangoDeployment & ArangoDeploymentReplication resources.
See https://www.arangodb.com/docs/stable/tutorials-kubernetes.html
for how to get started.
```
- As unlike cloud k8s offerings no file volume infrastructure exists, we need
to still deploy the storage operator chart:
```
helm install \
https://github.com/arangodb/kube-arangodb/releases/download/0.3.7/kube-arangodb-storage.tgz
```
```
NAME: sad-newt
LAST DEPLOYED: Mon Jan 14 06:14:15 2019
NAMESPACE: default
STATUS: DEPLOYED
RESOURCES:
==> v1/ServiceAccount
NAME SECRETS AGE
arango-storage-operator 1 1s
==> v1beta1/CustomResourceDefinition
NAME AGE
arangolocalstorages.storage.arangodb.com 1s
==> v1beta1/ClusterRole
NAME AGE
sad-newt-storages 1s
sad-newt-storage-operator 1s
==> v1beta1/ClusterRoleBinding
NAME AGE
sad-newt-storage-operator 1s
==> v1beta1/RoleBinding
NAME AGE
sad-newt-storages 1s
==> v1/Service
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
arango-storage-operator ClusterIP 10.104.172.100 <none> 8528/TCP 1s
==> v1beta1/Deployment
NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE
arango-storage-operator 2 2 2 0 1s
==> v1/Pod(related)
NAME READY STATUS RESTARTS AGE
arango-storage-operator-6bc64ccdfb-tzllq 0/1 ContainerCreating 0 0s
arango-storage-operator-6bc64ccdfb-zdlxk 0/1 Pending 0 0s
NOTES:
kube-arangodb-storage has been deployed successfully!
Your release is named 'sad-newt'.
You can now deploy an ArangoLocalStorage resource.
See https://www.arangodb.com/docs/stable/deployment-kubernetes-storage-resource.html
for further instructions.
```
## Deploy ArangoDB cluster
- Deploy local storage
```
kubectl apply -f https://raw.githubusercontent.com/arangodb/kube-arangodb/master/examples/arango-local-storage.yaml
```
```
arangolocalstorage.storage.arangodb.com/arangodb-local-storage created
```
- Deploy simple cluster
```
kubectl apply -f https://raw.githubusercontent.com/arangodb/kube-arangodb/master/examples/simple-cluster.yaml
```
```
arangodeployment.database.arangodb.com/example-simple-cluster created
```
## Access your cluster
- Find your cluster's network address:
```
kubectl get services
```
```
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
arango-deployment-operator ClusterIP 10.104.189.81 <none> 8528/TCP 14m
arango-deployment-replication-operator ClusterIP 10.107.2.133 <none> 8528/TCP 14m
example-simple-cluster ClusterIP 10.109.170.64 <none> 8529/TCP 5m18s
example-simple-cluster-ea NodePort 10.98.198.7 <none> 8529:30551/TCP 4m8s
example-simple-cluster-int ClusterIP None <none> 8529/TCP 5m19s
kubernetes ClusterIP 10.96.0.1 <none> 443/TCP 69m
```
- In this case, according to the access service, `example-simple-cluster-ea`,
the cluster's coordinators are reachable here:
https://kube01:30551, https://kube02:30551 and https://kube03:30551
## LoadBalancing
For this guide we like to use the `metallb` load balancer, which can be easiy
installed as a simple layer 2 load balancer:
- install the `metalllb` controller:
```
kubectl apply -f \
https://raw.githubusercontent.com/google/metallb/v0.7.3/manifests/metallb.yaml
```
```
namespace/metallb-system created
serviceaccount/controller created
serviceaccount/speaker created
clusterrole.rbac.authorization.k8s.io/metallb-system:controller created
clusterrole.rbac.authorization.k8s.io/metallb-system:speaker created
role.rbac.authorization.k8s.io/config-watcher created
clusterrolebinding.rbac.authorization.k8s.io/metallb-system:controller created
clusterrolebinding.rbac.authorization.k8s.io/metallb-system:speaker created
rolebinding.rbac.authorization.k8s.io/config-watcher created
daemonset.apps/speaker created
deployment.apps/controller created
```
- Deploy network range configurator. Assuming that the range for the IP addresses,
which are granted to `metalllb` for load balancing is 192.168.10.224/28,
download the [exmample layer2 configurator](https://raw.githubusercontent.com/google/metallb/v0.7.3/manifests/example-layer2-config.yaml).
```
wget https://raw.githubusercontent.com/google/metallb/v0.7.3/manifests/example-layer2-config.yaml
```
- Edit the `example-layer2-config.yaml` file to use the according addresses.
Do this with great care, as YAML files are indention sensitive.
```
apiVersion: v1
kind: ConfigMap
metadata:
namespace: metallb-system
name: config
data:
config: |
address-pools:
- name: my-ip-space
protocol: layer2
addresses:
- 192.168.10.224/28
```
- deploy the configuration map:
```
kubectl apply -f example-layer2-config.yaml
```
```
configmap/config created
```
- restart ArangoDB's endpoint access service:
```
kubectl delete service example-simple-cluster-ea
```
```
service "example-simple-cluster-ea" deleted
```
- watch, how the service goes from `Nodeport` to `LoadBalancer` the output above
```
kubectl get services
```
``` NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
arango-deployment-operator ClusterIP 10.104.189.81 <none> 8528/TCP 34m
arango-deployment-replication-operator ClusterIP 10.107.2.133 <none> 8528/TCP 34m
example-simple-cluster ClusterIP 10.109.170.64 <none> 8529/TCP 24m
example-simple-cluster-ea LoadBalancer 10.97.217.222 192.168.10.224 8529:30292/TCP 22s
example-simple-cluster-int ClusterIP None <none> 8529/TCP 24m
kubernetes ClusterIP 10.96.0.1 <none> 443/TCP 89m
```
- Now you are able of accessing all 3 coordinators through https://192.168.10.224:8529

View file

@ -1,11 +0,0 @@
# Customer questions
- What is your experience with using Kubernetes?
- What is your experience with using ArangoDB on Kubernetes?
- What do you think of the operator concept for an ArangoDB Kubernetes offering?
- What is the minimum version of Kubernetes you're running / need?
- What kind of persistent volumes do you use / plan to use?
- What kind of load-balancer support do you use / need for ArangoDB in Kubernetes?
- Do you have a need to limit ArangoDB Pods to a sub-section of your Kubernetes cluster?
- Do you see a need to shutdown a cluster and bring it back alive later (with its data!)?
- In which cloud/on premises environment are you going to use Kubernetes (AWS, GCE, on premise...)?

View file

@ -1,30 +1,13 @@
# ArangoDB operator design documents
# ArangoDB operator architecture details
- [Architecture change](./arch_change.md)
- [Constraints](./constraints.md)
- [Health](./health.md)
- [Metrics](./metrics.md)
- [Kubernetes Pod name versus cluster ID](./pod_name_versus_cluster_id.md)
- [Resource & labels](./resource_and_labels.md)
- [Resource Management](./resource_management.md)
- [Scaling](./scaling.md)
- [Status](./status.md)
- [Upgrading](./upgrading.md)
- [Rotating Pods](./rotating.md)
- [Maintenance](./maintenance.md)
- [Additional configuration](./additional_configuration.md)
- [Topology awareness](./topology_awareness.md)
- [Configuring timezone](./configuring_tz.md)
- [Operator API](./api.md)
- [Logging](./logging.md)
- [Manual Recovery](./recovery.md)
- [Backup](./backup.md)
## Features
- [Force rebuild out-synced Shards with broken Merkle Tree](./features/rebuild_out_synced_shards.md)
- [Failover Leader service](./features/failover_leader_service.md)
- [Restore defaults from last accepted state of deployment](./features/deployment_spec_defaults.md)
## Debugging
- [Collecting debug info](./debugging.md)
-
- [Backups](./backup.md)
- [Constraints for high-availability](./constraints.md)
- [ArangoDB Exporter](./exporter.md)
- [Health](./health.md)
- [Lifecycle hooks and Finalizers](./lifecycle_hooks_and_finalizers.md)
- [Pod eviction and replacement](./pod_eviction_and_replacement.md)
- [Kubernetes Pod name versus cluster ID](./pod_name_versus_cluster_id.md)
- [Resources & labels](./resources_and_labels.md)
- [Scaling](./scaling.md)
- [Topology awareness](./topology_awareness.md)

View file

@ -1,533 +0,0 @@
# Acceptance test for kube-arangodb operator on specific Kubernetes platform
This acceptance test plan describes all test scenario's that must be executed
successfully in order to consider the kube-arangodb operator production ready
on a specific Kubernetes setup (from now on we'll call a Kubernetes setup a platform).
## Platform parameters
Before the test, record the following parameters for the platform the test is executed on.
- Name of the platform
- Version of the platform
- Upstream Kubernetes version used by the platform (run `kubectl version`)
- Number of nodes used by the Kubernetes cluster (run `kubectl get node`)
- `StorageClasses` provided by the platform (run `kubectl get storageclass`)
- Does the platform use RBAC? (run `kubectl describe clusterrolebinding`)
- Does the platform support services of type `LoadBalancer`?
If one of the above questions can have multiple answers (e.g. different Kubernetes versions)
then make the platform more specific. E.g. consider "GKE with Kubernetes 1.10.2" a platform
instead of "GKE" which can have version "1.8", "1.9" & "1.10.2".
## Platform preparations
Before the tests can be run, the platform has to be prepared.
### Deploy the ArangoDB operators
Deploy the following ArangoDB operators:
- `ArangoDeployment` operator
- `ArangoDeploymentReplication` operator
- `ArangoLocalStorage` operator
To do so, follow the [instructions in the documentation](https://www.arangodb.com/docs/stable/deployment-kubernetes-usage.html).
### `PersistentVolume` provider
If the platform does not provide a `PersistentVolume` provider, create one by running:
```bash
kubectl apply -f examples/arango-local-storage.yaml
```
## Basis tests
The basis tests are executed on every platform with various images:
Run the following tests with the following images:
- Community <Version>
- Enterprise <Version>
For every tests, one of these images can be chosen, as long as each image
is used in a test at least once.
### Test 1a: Create single server deployment
Create an `ArangoDeployment` of mode `Single`.
Hint: Use `tests/acceptance/single.yaml`.
- [ ] The deployment must start
- [ ] The deployment must yield 1 `Pod`
- [ ] The deployment must yield a `Service` named `<deployment-name>`
- [ ] The deployment must yield a `Service` named `<deployment-name>-ea`
- [ ] The `Service` named `<deployment-name>-ea` must be accessible from outside (LoadBalancer or NodePort) and show WebUI
### Test 1b: Create active failover deployment
Create an `ArangoDeployment` of mode `ActiveFailover`.
Hint: Use `tests/acceptance/activefailover.yaml`.
- [ ] The deployment must start
- [ ] The deployment must yield 5 `Pods`
- [ ] The deployment must yield a `Service` named `<deployment-name>`
- [ ] The deployment must yield a `Service` named `<deployment-name>-ea`
- [ ] The `Service` named `<deployment-name>-ea` must be accessible from outside (LoadBalancer or NodePort) and show WebUI
### Test 1c: Create cluster deployment
Create an `ArangoDeployment` of mode `Cluster`.
Hint: Use `tests/acceptance/cluster.yaml`.
- [ ] The deployment must start
- [ ] The deployment must yield 9 `Pods`
- [ ] The deployment must yield a `Service` named `<deployment-name>`
- [ ] The deployment must yield a `Service` named `<deployment-name>-ea`
- [ ] The `Service` named `<deployment-name>-ea` must be accessible from outside (LoadBalancer or NodePort) and show WebUI
### Test 1d: Create cluster deployment with dc2dc
This test requires the use of the enterprise image.
Create an `ArangoDeployment` of mode `Cluster` and dc2dc enabled.
Hint: Derive from `tests/acceptance/cluster-sync.yaml`.
- [ ] The deployment must start
- [ ] The deployment must yield 15 `Pods`
- [ ] The deployment must yield a `Service` named `<deployment-name>`
- [ ] The deployment must yield a `Service` named `<deployment-name>-ea`
- [ ] The deployment must yield a `Service` named `<deployment-name>-sync`
- [ ] The `Service` named `<deployment-name>-ea` must be accessible from outside (LoadBalancer or NodePort) and show WebUI
### Test 2a: Scale an active failover deployment
Create an `ArangoDeployment` of mode `ActiveFailover`.
- [ ] The deployment must start
- [ ] The deployment must yield 5 `Pods`
- [ ] The deployment must yield a `Service` named `<deployment-name>`
- [ ] The deployment must yield a `Service` named `<deployment-name>-ea`
- [ ] The `Service` named `<deployment-name>-ea` must be accessible from outside (LoadBalancer or NodePort) and show WebUI
Change the value of `spec.single.count` from 2 to 3.
- [ ] A single server is added
- [ ] The deployment must yield 6 `Pods`
Change the value of `spec.single.count` from 3 to 2.
- [ ] A single server is removed
- [ ] The deployment must yield 5 `Pods`
### Test 2b: Scale a cluster deployment
Create an `ArangoDeployment` of mode `Cluster`.
Hint: Use `tests/acceptance/cluster.yaml`.
- [ ] The deployment must start
- [ ] The deployment must yield 9 `Pods`
- [ ] The deployment must yield a `Service` named `<deployment-name>`
- [ ] The deployment must yield a `Service` named `<deployment-name>-ea`
- [ ] The `Service` named `<deployment-name>-ea` must be accessible from outside (LoadBalancer or NodePort) and show WebUI
Change the value of `spec.dbservers.count` from 3 to 5.
- [ ] Two dbservers are added
- [ ] The deployment must yield 11 `Pods`
Change the value of `spec.coordinators.count` from 3 to 4.
- [ ] A coordinator is added
- [ ] The deployment must yield 12 `Pods`
Change the value of `spec.dbservers.count` from 5 to 2.
- [ ] Three dbservers are removed (one by one)
- [ ] The deployment must yield 9 `Pods`
Change the value of `spec.coordinators.count` from 4 to 1.
- [ ] Three coordinators are removed (one by one)
- [ ] The deployment must yield 6 `Pods`
### Test 3: Production environment
Production environment tests are only relevant if there are enough nodes
available that `Pods` can be scheduled on.
The number of available nodes must be >= the maximum server count in
any group.
### Test 3a: Create single server deployment in production environment
Create an `ArangoDeployment` of mode `Single` with an environment of `Production`.
Hint: Derive from `tests/acceptance/single.yaml`.
- [ ] The deployment must start
- [ ] The deployment must yield 1 `Pod`
- [ ] The deployment must yield a `Service` named `<deployment-name>`
- [ ] The deployment must yield a `Service` named `<deployment-name>-ea`
- [ ] The `Service` named `<deployment-name>-ea` must be accessible from outside (LoadBalancer or NodePort) and show WebUI
### Test 3b: Create active failover deployment in production environment
Create an `ArangoDeployment` of mode `ActiveFailover` with an environment of `Production`.
Hint: Derive from `tests/acceptance/activefailover.yaml`.
- [ ] The deployment must start
- [ ] The deployment must yield 5 `Pods`
- [ ] The deployment must yield a `Service` named `<deployment-name>`
- [ ] The deployment must yield a `Service` named `<deployment-name>-ea`
- [ ] The `Service` named `<deployment-name>-ea` must be accessible from outside (LoadBalancer or NodePort) and show WebUI
### Test 3c: Create cluster deployment in production environment
Create an `ArangoDeployment` of mode `Cluster` with an environment of `Production`.
Hint: Derive from `tests/acceptance/cluster.yaml`.
- [ ] The deployment must start
- [ ] The deployment must yield 9 `Pods`
- [ ] The deployment must yield a `Service` named `<deployment-name>`
- [ ] The deployment must yield a `Service` named `<deployment-name>-ea`
- [ ] The `Service` named `<deployment-name>-ea` must be accessible from outside (LoadBalancer or NodePort) and show WebUI
### Test 3d: Create cluster deployment in production environment and scale it
Create an `ArangoDeployment` of mode `Cluster` with an environment of `Production`.
Hint: Derive from `tests/acceptance/cluster.yaml`.
- [ ] The deployment must start
- [ ] The deployment must yield 9 `Pods`
- [ ] The deployment must yield a `Service` named `<deployment-name>`
- [ ] The deployment must yield a `Service` named `<deployment-name>-ea`
- [ ] The `Service` named `<deployment-name>-ea` must be accessible from outside (LoadBalancer or NodePort) and show WebUI
Change the value of `spec.dbservers.count` from 3 to 4.
- [ ] Two dbservers are added
- [ ] The deployment must yield 10 `Pods`
Change the value of `spec.coordinators.count` from 3 to 4.
- [ ] A coordinator is added
- [ ] The deployment must yield 11 `Pods`
Change the value of `spec.dbservers.count` from 4 to 2.
- [ ] Three dbservers are removed (one by one)
- [ ] The deployment must yield 9 `Pods`
Change the value of `spec.coordinators.count` from 4 to 2.
- [ ] Three coordinators are removed (one by one)
- [ ] The deployment must yield 7 `Pods`
### Test 4a: Create cluster deployment with `ArangoLocalStorage` provided volumes
Ensure an `ArangoLocalStorage` is deployed.
Hint: Use from `tests/acceptance/local-storage.yaml`.
Create an `ArangoDeployment` of mode `Cluster` with a `StorageClass` that is
mapped to an `ArangoLocalStorage` provider.
Hint: Derive from `tests/acceptance/cluster.yaml`.
- [ ] The deployment must start
- [ ] The deployment must yield 9 `Pods`
- [ ] The deployment must yield a `Service` named `<deployment-name>`
- [ ] The deployment must yield a `Service` named `<deployment-name>-ea`
- [ ] The `Service` named `<deployment-name>-ea` must be accessible from outside (LoadBalancer or NodePort) and show WebUI
### Test 4b: Create cluster deployment with a platform provided `StorageClass`
This test only applies to platforms that provide their own `StorageClasses`.
Create an `ArangoDeployment` of mode `Cluster` with a `StorageClass` that is
provided by the platform.
Hint: Derive from `tests/acceptance/cluster.yaml`.
- [ ] The deployment must start
- [ ] The deployment must yield 9 `Pods`
- [ ] The deployment must yield a `Service` named `<deployment-name>`
- [ ] The deployment must yield a `Service` named `<deployment-name>-ea`
- [ ] The `Service` named `<deployment-name>-ea` must be accessible from outside (LoadBalancer or NodePort) and show WebUI
### Test 5a: Test `Pod` resilience on single servers
Create an `ArangoDeployment` of mode `Single`.
Hint: Use from `tests/acceptance/single.yaml`.
- [ ] The deployment must start
- [ ] The deployment must yield 1 `Pod`
- [ ] The deployment must yield a `Service` named `<deployment-name>`
- [ ] The deployment must yield a `Service` named `<deployment-name>-ea`
- [ ] The `Service` named `<deployment-name>-ea` must be accessible from outside (LoadBalancer or NodePort) and show WebUI
Delete the `Pod` of the deployment that contains the single server.
- [ ] The `Pod` must be restarted
- [ ] After the `Pod` has restarted, the server must have the same data and be responsive again
### Test 5b: Test `Pod` resilience on active failover
Create an `ArangoDeployment` of mode `ActiveFailover`.
Hint: Use from `tests/acceptance/activefailover.yaml`.
- [ ] The deployment must start
- [ ] The deployment must yield 5 `Pods`
- [ ] The deployment must yield a `Service` named `<deployment-name>`
- [ ] The deployment must yield a `Service` named `<deployment-name>-ea`
- [ ] The `Service` named `<deployment-name>-ea` must be accessible from outside (LoadBalancer or NodePort) and show WebUI
Delete a `Pod` of the deployment that contains an agent.
- [ ] While the `Pod` is gone & restarted, the cluster must still respond to requests (R/W)
- [ ] The `Pod` must be restarted
Delete a `Pod` of the deployment that contains a single server.
- [ ] While the `Pod` is gone & restarted, the cluster must still respond to requests (R/W)
- [ ] The `Pod` must be restarted
### Test 5c: Test `Pod` resilience on clusters
Create an `ArangoDeployment` of mode `Cluster`.
Hint: Use from `tests/acceptance/cluster.yaml`.
- [ ] The deployment must start
- [ ] The deployment must yield 9 `Pods`
- [ ] The deployment must yield a `Service` named `<deployment-name>`
- [ ] The deployment must yield a `Service` named `<deployment-name>-ea`
- [ ] The `Service` named `<deployment-name>-ea` must be accessible from outside (LoadBalancer or NodePort) and show WebUI
Delete a `Pod` of the deployment that contains an agent.
- [ ] While the `Pod` is gone & restarted, the cluster must still respond to requests (R/W)
- [ ] The `Pod` must be restarted
Delete a `Pod` of the deployment that contains a dbserver.
- [ ] While the `Pod` is gone & restarted, the cluster must still respond to requests (R/W), except
for requests to collections with a replication factor of 1.
- [ ] The `Pod` must be restarted
Delete a `Pod` of the deployment that contains an coordinator.
- [ ] While the `Pod` is gone & restarted, the cluster must still respond to requests (R/W), except
requests targeting the restarting coordinator.
- [ ] The `Pod` must be restarted
### Test 6a: Test `Node` reboot on single servers
Create an `ArangoDeployment` of mode `Single`.
Hint: Use from `tests/acceptance/single.yaml`.
- [ ] The deployment must start
- [ ] The deployment must yield 1 `Pod`
- [ ] The deployment must yield a `Service` named `<deployment-name>`
- [ ] The deployment must yield a `Service` named `<deployment-name>-ea`
- [ ] The `Service` named `<deployment-name>-ea` must be accessible from outside (LoadBalancer or NodePort) and show WebUI
Reboot the `Node` of the deployment that contains the single server.
- [ ] The `Pod` running on the `Node` must be restarted
- [ ] After the `Pod` has restarted, the server must have the same data and be responsive again
### Test 6b: Test `Node` reboot on active failover
Create an `ArangoDeployment` of mode `ActiveFailover` with an environment of `Production`.
Hint: Use from `tests/acceptance/activefailover.yaml`.
- [ ] The deployment must start
- [ ] The deployment must yield 5 `Pods`
- [ ] The deployment must yield a `Service` named `<deployment-name>`
- [ ] The deployment must yield a `Service` named `<deployment-name>-ea`
- [ ] The `Service` named `<deployment-name>-ea` must be accessible from outside (LoadBalancer or NodePort) and show WebUI
Reboot a `Node`.
- [ ] While the `Node` is restarting, the cluster must still respond to requests (R/W)
- [ ] All `Pods` on the `Node` must be restarted
### Test 6c: Test `Node` reboot on clusters
Create an `ArangoDeployment` of mode `Cluster` with an environment of `Production`.
Hint: Use from `tests/acceptance/cluster.yaml`.
- [ ] The deployment must start
- [ ] The deployment must yield 9 `Pods`
- [ ] The deployment must yield a `Service` named `<deployment-name>`
- [ ] The deployment must yield a `Service` named `<deployment-name>-ea`
- [ ] The `Service` named `<deployment-name>-ea` must be accessible from outside (LoadBalancer or NodePort) and show WebUI
Reboot a `Node`.
- [ ] While the `Node` is restarting, the cluster must still respond to requests (R/W)
- [ ] All `Pods` on the `Node` must be restarted
### Test 6d: Test `Node` removal on single servers
This test is only valid when `StorageClass` is used that provides network attached `PersistentVolumes`.
Create an `ArangoDeployment` of mode `Single`.
Hint: Use from `tests/acceptance/single.yaml`.
- [ ] The deployment must start
- [ ] The deployment must yield 1 `Pod`
- [ ] The deployment must yield a `Service` named `<deployment-name>`
- [ ] The deployment must yield a `Service` named `<deployment-name>-ea`
- [ ] The `Service` named `<deployment-name>-ea` must be accessible from outside (LoadBalancer or NodePort) and show WebUI
Remove the `Node` containing the deployment from the Kubernetes cluster.
- [ ] The `Pod` running on the `Node` must be restarted on another `Node`
- [ ] After the `Pod` has restarted, the server must have the same data and be responsive again
### Test 6e: Test `Node` removal on active failover
Create an `ArangoDeployment` of mode `ActiveFailover` with an environment of `Production`.
Hint: Use from `tests/acceptance/activefailover.yaml`.
- [ ] The deployment must start
- [ ] The deployment must yield 5 `Pods`
- [ ] The deployment must yield a `Service` named `<deployment-name>`
- [ ] The deployment must yield a `Service` named `<deployment-name>-ea`
- [ ] The `Service` named `<deployment-name>-ea` must be accessible from outside (LoadBalancer or NodePort) and show WebUI
Remove a `Node` containing the `Pods` of the deployment from the Kubernetes cluster.
- [ ] While the `Pods` are being restarted on new `Nodes`, the cluster must still respond to requests (R/W)
- [ ] The `Pods` running on the `Node` must be restarted on another `Node`
- [ ] After the `Pods` have restarted, the server must have the same data and be responsive again
### Test 6f: Test `Node` removal on clusters
This test is only valid when:
- A `StorageClass` is used that provides network attached `PersistentVolumes`
- or all collections have a replication factor of 2 or higher
Create an `ArangoDeployment` of mode `Cluster` with an environment of `Production`.
Hint: Use from `tests/acceptance/cluster.yaml`.
- [ ] The deployment must start
- [ ] The deployment must yield 9 `Pods`
- [ ] The deployment must yield a `Service` named `<deployment-name>`
- [ ] The deployment must yield a `Service` named `<deployment-name>-ea`
- [ ] The `Service` named `<deployment-name>-ea` must be accessible from outside (LoadBalancer or NodePort) and show WebUI
Remove a `Node` containing the `Pods` of the deployment from the Kubernetes cluster.
- [ ] While the `Pods` are being restarted on new `Nodes`, the cluster must still respond to requests (R/W)
- [ ] The `Pods` running on the `Node` must be restarted on another `Node`
- [ ] After the `Pods` have restarted, the server must have the same data and be responsive again
### Test 6g: Test `Node` removal on clusters with replication factor 1
This test is only valid when:
- A `StorageClass` is used that provides `Node` local `PersistentVolumes`
- and at least some collections have a replication factor of 1
Create an `ArangoDeployment` of mode `Cluster` with an environment of `Production`.
Hint: Use from `tests/acceptance/cluster.yaml`.
- [ ] The deployment must start
- [ ] The deployment must yield 9 `Pods`
- [ ] The deployment must yield a `Service` named `<deployment-name>`
- [ ] The deployment must yield a `Service` named `<deployment-name>-ea`
- [ ] The `Service` named `<deployment-name>-ea` must be accessible from outside (LoadBalancer or NodePort) and show WebUI
Remove a `Node`, containing the dbserver `Pod` that holds a collection with replication factor 1,
from the Kubernetes cluster.
- [ ] While the `Pods` are being restarted on new `Nodes`, the cluster must still respond to requests (R/W),
except requests involving collections with a replication factor of 1
- [ ] The `Pod` running the dbserver with a collection that has a replication factor of 1 must NOT be restarted on another `Node`
Remove the collections with the replication factor of 1
- [ ] The remaining `Pods` running on the `Node` must be restarted on another `Node`
- [ ] After the `Pods` have restarted, the server must have the same data, except for the removed collections, and be responsive again
### Test 7a: Test DC2DC on 2 clusters, running in the same Kubernetes cluster
This test requires the use of the enterprise image.
Create 2 `ArangoDeployment` of mode `Cluster` and dc2dc enabled.
Hint: Derive from `tests/acceptance/cluster-sync.yaml`, name the deployments `cluster1` and `cluster2`.
Make sure to include a name ('cluster1-to-2`) for an external access package.
```yaml
apiVersion: "database.arangodb.com/v1alpha"
kind: "ArangoDeployment"
metadata:
name: "cluster1"
spec:
mode: Cluster
image: ewoutp/arangodb:3.3.14
sync:
enabled: true
externalAccess:
accessPackageSecretNames: ["cluster1-to-2"]
```
- [ ] The deployments must start
- [ ] The deployments must yield 15 `Pods`
- [ ] The deployments must yield a `Service` named `cluster[1|2]`
- [ ] The deployments must yield a `Service` named `cluster[1|2]-ea`
- [ ] The deployments must yield a `Service` named `cluster[1|2]-sync`
- [ ] The `Services` named `cluster[1|2]-ea` must be accessible from outside (LoadBalancer or NodePort) and show WebUI
Create an `ArangoDeploymentReplication` from `tests/acceptance/cluster12-replication.yaml`.
It will take some time until the synchronization (from `cluster1` to `cluster2`) is configured.
- [ ] The status of the `cluster12-replication` resource shows ....
- [ ] The webUI of `cluster1` shows that you can create a new collection there.
- [ ] The webUI of `cluster2` shows that you cannot create a new collection there.
Create a collection named `testcol` with a replication factor 2 and 3 shards (using the webUI of `cluster1`).
- [ ] The webUI of `cluster2` shows collection `testcol` with the given replication factor and number of shards.
Create multiple documents in the collection named `testcol` (using the webUI of `cluster1`).
- [ ] The documents are visible in webUI of `cluster2`.
Modify multiple documents in the collection named `testcol` (using the webUI of `cluster1`).
- [ ] The modified documents are visible in webUI of `cluster2`.
Remove one or more documents from the collection named `testcol` (using the webUI of `cluster1`).
- [ ] The documents are no longer visible in webUI of `cluster2`.
Create a new database called `db2` (using the webUI of `cluster1`).
- [ ] The webUI of `cluster2` shows database `db2`.

View file

@ -1,13 +0,0 @@
# Acceptance test platforms
The [kube-arangodb acceptance tests](./acceptance_test.md) must be
executed on the following platforms:
- Google GKE, with Kubernetes version 1.10
- Amazon EKS, with Kubernetes version 1.10
- Amazon & Kops, with Kubernetes version 1.10
- Azure AKS, with Kubernetes version 1.10
- Openshift, based on Kubernetes version 1.10
- Bare metal with kubeadm 1.10
- Minikube with Kubernetes version 1.10
- Kubernetes on docker for Mac, with Kubernetes version 1.10

View file

@ -1,64 +1,3 @@
# Deployment Operator Dashboard
To inspect the state of an `ArangoDeployment` you can use `kubectl get ...` to inspect
the `status` of the resource itself, but to get the entire "picture" you also
must inspect the status of the `Pods` created for the deployment, the `PersistentVolumeClaims`,
the `PersistentVolumes`, the `Services` and some `Secrets`.
The goal of the operator dashboard is to simplify this inspection process.
The deployment operator dashboard provides:
- A status overview of all `ArangoDeployments` it controls
- A status overview of all resources created by the operator (for an `ArangoDeployment`)
- Run the arangoinspector on deployments
- Instructions for upgrading deployments to newer versions
It does not provide:
- Direct access to the deployed database
- Anything that can already be done in the web-UI of the database or naturaly belongs there.
The dashboard is a single-page web application that is served by the operator itself.
## Design decisions
### Leader only
Since only the operator instance that won the leader election has the latest state of all
deployments, only that instance will serve dashboard requests.
For this purpose, a `Service` is created when deploying the operator.
This service uses a `role=leader` selector to ensure that only the right instance
will be included in its list of endpoints.
### Exposing the dashboard
By default the `Service` that selects the leading operator instance is not exposed outside the Kubernetes cluster.
Users must use `kubectl expose service ...` to add additional `Services` of type `LoadBalancer`
or `NodePort` to expose the dashboard if and how they want to.
### Readonly behavior
The dashboard only provides readonly functions.
When modifications to an `ArangoDeployment` are needed (e.g. when upgrading to a new version), the dashboard
will provide instructions for doing so using `kubectl` commands.
In doing so, the requirements for authentication & access control of the dashboard itself remain limited,
while all possible authentication & access control features of Kubernetes are still available to ensure
a secure deployment.
### Authentication
The dashboard requires a username+password to gain access, unless it is started with an option to disable authentication.
This username+password pair is stored in a standard basic authentication `Secret` in the Kubernetes cluster.
### Frontend technology
The frontend part of the dashboard will be built with React.
This aligns with future developments in the context of the web-UI of the database itself.
### Backend technology
The backend of the dashboard contains an HTTPS server that serves the dashboard webpage (including all required web resources)
and all API methods it needs.
### Dashboard UI now is deprecated and will be removed in next minor version

View file

@ -1,7 +1,7 @@
# Lifecycle hooks & Finalizers
The ArangoDB operator expects full control of the `Pods` and `PersistentVolumeClaims` it creates.
Therefore it takes measures to prevent the removal of those resources
Therefore, it takes measures to prevent the removal of those resources
until it is safe to do so.
To achieve this, the server containers in the `Pods` have
@ -27,11 +27,17 @@ is shared between the init-container and the server container.
## Finalizers
The ArangoDB operators adds the following finalizers to `Pods`.
The ArangoDB operators adds the following finalizers to `Pods`:
- `dbserver.database.arangodb.com/drain`: Added to DBServers, removed only when the dbserver can be restarted or is completely drained
- `agent.database.arangodb.com/agency-serving`: Added to Agents, removed only when enough agents are left to keep the agency serving
- `pod.database.arangodb.com/delay`: Delays pod termination
- `database.arangodb.com/graceful-shutdown`: Added to All members, indicating the need for graceful shutdown
The ArangoDB operators adds the following finalizers to `PersistentVolumeClaims`.
The ArangoDB operators adds the following finalizers to `PersistentVolumeClaims`:
- `pvc.database.arangodb.com/member-exists`: Removed only when its member no longer exists or can be safely rebuild
- `pvc.database.arangodb.com/member-exists`: removed only when its member exists no longer exists or can be safely rebuild
The ArangoDB operators adds the following finalizers to `ArangoDeployment`:
- `database.arangodb.com/remove-child-finalizers`: Clean-ups finalizers from all children resources
The ArangoDB operators adds the following finalizers to `ArangoDeploymentReplication`:
- `replication.database.arangodb.com/stop-sync`: Stops deployment-to-deployment replication

View file

@ -1,14 +0,0 @@
# Maintenance
## ArangoDeployment
Maintenance on ArangoDeployment can be enabled using annotation.
Key: `deployment.arangodb.com/maintenance`
Value: `true`
To enable maintenance mode for ArangoDeployment kubectl command can be used:
`kubectl annotate arangodeployment deployment deployment.arangodb.com/maintenance=true`
To disable maintenance mode for ArangoDeployment kubectl command can be used:
`kubectl annotate --overwrite arangodeployment deployment deployment.arangodb.com/maintenance-`

View file

@ -3,13 +3,13 @@
All resources being created will get a name that contains
the user provided cluster name and a unique part.
The unique part will be difference for every pod that
The unique part will be different for every pod that
is being created.
E.g. when upgrading to a new version, we generate a new
unique pod name.
The servers in the ArangoDB cluster will be assigned
a persistent, unique ID.
a persistent, unique ID which is stored in ArangoMember CR.
When a Pod changes (e.g. because of an upgrade) the
Pod name changes, but the cluster ID remains the same.

View file

@ -60,17 +60,8 @@ For a full cluster deployment, the following Kubernetes resources are created:
- `arangodb_deployment: <deployment-name>`
- `role: dbserver`
- Headless `Service` for accessing the all server, named `<deployment-name>_servers`.
The service will provide access all server server from within the k8s cluster.
- Labels:
- `app=arangodb`
- `arangodb_deployment: <deployment-name>`
- Selector:
- `app=arangodb`
- `arangodb_deployment: <deployment-name>`
- `Service` for accessing the all coordinators, named `<deployment-name>`.
The service will provide access all coordinators from within the k8s cluster.
- `Service` for accessing all coordinators, named `<deployment-name>`.
The service will provide access to all coordinators from within the k8s cluster.
- Labels:
- `app=arangodb`
- `arangodb_deployment: <deployment-name>`
@ -86,17 +77,17 @@ For a full cluster with datacenter replication deployment,
the same resources are created as for a Full cluster, with the following
additions:
- `Pods` running ArangoSync workers named `<deployment-name>_syncworker_<x>`.
- `Pods` running ArangoSync workers named `<deployment-name>-syncworker-<x>`.
- Labels:
- `app=arangodb`
- `arangodb_deployment: <deployment-name>`
- `role: syncworker`
- `Pods` running ArangoSync master named `<deployment-name>_syncmaster_<x>`.
- `Pods` running ArangoSync master named `<deployment-name>-syncmaster-<x>`.
- Labels:
- `app=arangodb`
- `arangodb_deployment: <deployment-name>`
- `role: syncmaster`
- `Service` for accessing the sync masters, named `<deployment-name>_sync`.
- `Service` for accessing the sync masters, named `<deployment-name>-sync`.
The service will provide access to all syncmaster from within the Kubernetes cluster.

View file

@ -1,28 +1,21 @@
# Scaling
The internal process followed by the ArangoDB operator
when scaling up is as follows:
Number of running servers is controlled through `spec.<server_group>.count` field.
- Set CR state to `Scaling`
- Create an additional server Pod
- Wait until server is ready before continuing
- Set CR state to `Ready`
### Scale-up
When increasing the `count`, operator will try to create missing pods.
When scaling up make sure that you have enough computational resources / nodes, otherwise pod will stuck in Pending state.
The internal process followed by the ArangoDB operator
when scaling down a dbserver is as follows:
- Set CR state to `Scaling`
- Drain the dbserver (TODO fill in procedure)
- Shutdown the dbserver such that it removes itself from the agency
- Remove the dbserver Pod
- Set CR state to `Ready`
### Scale-down
The internal process followed by the ArangoDB operator
when scaling down a coordinator is as follows:
Scaling down is always done 1 server at a time.
- Set CR state to `Scaling`
- Shutdown the coordinator such that it removes itself from the agency
- Remove the coordinator Pod
- Set CR state to `Ready`
Scale down is possible only when all other actions on ArangoDeployment are finished.
Note: Scaling is always done 1 server at a time.
The internal process followed by the ArangoDB operator when scaling up is as follows:
- It chooses a member to be evicted. First it will try to remove unhealthy members or fall-back to the member with highest deletion_priority.
- Making an internal calls, it forces the server to resign leadership.
In case of DB servers it means that all shard leaders will be switched to other servers.
- Wait until server is cleaned out from cluster
- Pod finalized

View file

@ -1,33 +0,0 @@
# Status
The status field of the `CustomResource` must contain all persistent state needed to
create & maintain the cluster.
## `status.state: string`
This field contains the current status of the cluster.
Possible values are:
- `Creating` when the cluster is first be created.
- `Ready` when all pods if the cluster are in running state.
- `Scaling` when pods are being added to an existing cluster or removed from an existing cluster.
- `Upgrading` when cluster is in the process of being upgraded to another version.
## `status.members.<group>.[x].state: string`
This field contains the pod state of server x of this group.
Possible values are:
- `Creating` when the pod is about to be created.
- `Ready` when the pod has been created.
- `Draining` when a dbserver pod is being drained.
- `ShuttingDown` when a server is in the process of shutting down.
## `status.members.<group>.[x].podName: string`
This field contains the name of the current pod that runs server x of this group.
## `status.members.<group>.[x].clusterID: string`
This field contains the unique cluster ID of server x of this group.
The field is only valid for groups `single`, `agents`, `dbservers` & `coordinators`.

View file

@ -1,16 +0,0 @@
# Test clusters
The ArangoDB operator is tested on various types of kubernetes clusters.
To prepare a cluster for running the ArangoDB operator tests,
do the following:
- Create a `kubectl` config file for accessing the cluster.
- Use that config file.
- Run `./scripts/kube_configure_test_cluster.sh`. This creates a `ConfigMap`
named `arango-operator-test` in the `kube-system` namespace containing the
following environment variables.
```bash
REQUIRE_LOCAL_STORAGE=1
```

View file

@ -1,40 +0,0 @@
# Testing
## Scenario's
The following test scenario's must be covered by automated tests:
- Creating 1 deployment (all modes, all environments, all storage engines)
- Creating multiple deployments (all modes, all environments, all storage engines),
controlling each individually
- Creating deployment with/without authentication
- Creating deployment with/without TLS
- Updating deployment wrt:
- Number of servers (scaling, up/down)
- Image version (upgrading, downgrading within same minor version range (e.g. 3.2.x))
- Immutable fields (should be reset automatically)
- Resilience:
- Delete individual pods
- Delete individual PVCs
- Delete individual Services
- Delete Node
- Restart Node
- API server unavailable
- Persistent Volumes:
- hint: RBAC file might need to be changed
- hint: get info via - client-go.CoreV1()
- Number of volumes should stay in reasonable bounds
- For some cases it might be possible to check that, the amount before and after the test stays the same
- A Cluster start should need 6 Volumes (DBServer + Agents)
- The release of a volume-claim should result in a release of the volume
## Test environments
- Kubernetes clusters
- Single node
- Multi node
- Access control mode (RBAC, ...)
- Persistent volumes ...

View file

@ -1,32 +0,0 @@
# Upgrade procedure
## Upgrading ArangoDB single to another version
The process for upgrading an existing ArangoDB single server
to another version is as follows:
- Set CR state to `Upgrading`
- Remove the server Pod (keep persistent volume)
- Create a new server Pod with new version
- Wait until server is ready before continuing
- Set CR state to `Ready`
## Upgrading ArangoDB cluster to another version
The process for upgrading an existing ArangoDB cluster
to another version is as follows:
- Set CR state to `Upgrading`
- For each agent:
- Remove the agent Pod (keep persistent volume)
- Create new agent Pod with new version
- Wait until agent is ready before continuing
- For each dbserver:
- Remove the dbserver Pod (keep persistent volume)
- Create new dbserver Pod with new version
- Wait until dbserver is ready before continuing
- For each coordinator:
- Remove the coordinator Pod (keep persistent volume)
- Create new coordinator Pod with new version
- Wait until coordinator is ready before continuing
- Set CR state to `Ready`

41
docs/features/README.md Normal file
View file

@ -0,0 +1,41 @@
## List of Community Edition features
| Feature | Operator Version | Introduced | ArangoDB Version | ArangoDB Edition | State | Enabled | Flag | Remarks |
|:-----------------------------------------------------|:-----------------|:-----------|:-----------------|:-----------------|:-----------|:--------|:-----|:----------------------------------------------------------------------------|
| AgencyCache | 1.2.30 | 1.2.30 | >= 3.8.0 | Enterprise | Production | True | N/A | Enable Agency Cache mechanism in the Operator (Increase limit of the nodes) |
| Member Maintenance Support | 1.2.25 | 1.2.16 | >= 3.8.0 | Enterprise | Production | True | N/A | Enable Member Maintenance during planned restarts |
| [Rebalancer](rebalancer.md) | 1.2.15 | 1.2.5 | >= 3.8.0 | Enterprise | Production | True | N/A | N/A |
| [TopologyAwareness](../design/topology_awareness.md) | 1.2.4 | 1.2.4 | >= 3.8.0 | Enterprise | Production | True | N/A | N/A |
## List of Enterprise Edition features
| Feature | Operator Version | Introduced | ArangoDB Version | ArangoDB Edition | State | Enabled | Flag | Remarks |
|:----------------------------------------------------------------|:-----------------|:-----------|:-----------------|:----------------------|:-------------|:--------|:------------------------------------------------------|:-----------------------------------------------------------------------------------|
| Enforced ResignLeadership | 1.2.34 | 1.2.34 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.enforced-resign-leadership | Enforce ResignLeadership and ensure that Leaders are moved from restarted DBServer |
| Copy resources spec to init containers | 1.2.33 | 1.2.33 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.init-containers-copy-resources | Copy resources spec to built-in init containers if they are not specified |
| [Rebalancer V2](rebalancer_v2.md) | 1.2.31 | 1.2.31 | >= 3.10.0 | Community, Enterprise | Alpha | False | --deployment.feature.rebalancer-v2 | N/A |
| [Secured containers](secured_containers.md) | 1.2.31 | 1.2.31 | >= 3.8.0 | Community, Enterprise | Alpha | False | --deployment.feature.secured-containers | If set to True Operator will run containers in secure mode |
| Version Check V2 | 1.2.31 | 1.2.31 | >= 3.8.0 | Community, Enterprise | Alpha | False | --deployment.feature.upgrade-version-check-V2 | N/A |
| [Operator Ephemeral Volumes](ephemeral_volumes.md) | 1.2.31 | 1.2.2 | >= 3.8.0 | Community, Enterprise | Beta | False | --deployment.feature.ephemeral-volumes | N/A |
| [Force Rebuild Out Synced Shards](rebuild_out_synced_shards.md) | 1.2.27 | 1.2.27 | >= 3.8.0 | Community, Enterprise | Production | False | --deployment.feature.force-rebuild-out-synced-shards | It should be used only if user is aware of the risks. |
| [Spec Default Restore](deployment_spec_defaults.md) | 1.2.25 | 1.2.21 | >= 3.8.0 | Community, Enterprise | Beta | True | --deployment.feature.deployment-spec-defaults-restore | If set to False Operator will not change ArangoDeployment Spec |
| Version Check | 1.2.23 | 1.1.4 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.upgrade-version-check | N/A |
| [Failover Leader service](failover_leader_service.md) | 1.2.13 | 1.2.13 | >= 3.8.0 | Community, Enterprise | Production | False | --deployment.feature.failover-leadership | N/A |
| Graceful Restart | 1.2.5 | 1.0.7 | >= 3.8.0 | Community, Enterprise | Production | True | ---deployment.feature.graceful-shutdown | N/A |
| Optional Graceful Restart | 1.2.0 | 1.2.5 | >= 3.8.0 | Community, Enterprise | Production | False | --deployment.feature.optional-graceful-shutdown | N/A |
| Operator Internal Metrics Exporter | 1.2.0 | 1.2.0 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.metrics-exporter | N/A |
| Operator Maintenance Management Support | 1.2.0 | 1.0.7 | >= 3.8.0 | Community, Enterprise | Production | True | --deployment.feature.maintenance | N/A |
| Encryption Key Rotation Support | 1.2.0 | 1.0.3 | >= 3.8.0 | Enterprise | NotSupported | False | --deployment.feature.encryption-rotation | N/A |
| TLS Runtime Rotation Support | 1.1.0 | 1.0.4 | >= 3.8.0 | Enterprise | Production | True | --deployment.feature.tls-rotation | N/A |
| JWT Rotation Support | 1.1.0 | 1.0.3 | >= 3.8.0 | Enterprise | Production | True | --deployment.feature.jwt-rotation | N/A |
| Operator Single Mode | 1.0.4 | 1.0.4 | >= 3.8.0 | Community, Enterprise | Production | False | --mode.single | Only 1 instance of Operator allowed in namespace when feature is enabled |
| TLS SNI Support | 1.0.3 | 1.0.3 | >= 3.8.0 | Enterprise | Production | True | --deployment.feature.tls-sni | N/A |
| Disabling of liveness probes | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A |
| Pod Disruption Budgets | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A |
| Prometheus Metrics Exporter | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | Prometheus required |
| Sidecar Containers | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A |
| Volume Claim Templates | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A |
| Volume Resizing | 0.3.11 | 0.3.10 | >= 3.8.0 | Community, Enterprise | Production | True | N/A | N/A |

View file

@ -15,7 +15,7 @@ Change Default settings of:
## Dependencies
- [Operator Ephemeral Volumes](./ephemeral_volumes.md) should be Enabled and Supported.
- [Operator Ephemeral Volumes](ephemeral_volumes.md) should be Enabled and Supported.
## How to use

12
docs/how-to/README.md Normal file
View file

@ -0,0 +1,12 @@
## How-to...
- [Pass additional params to operator](additional_configuration.md)
- [Change architecture / enable ARM support](arch_change.md)
- [Configure timezone for cluster](configuring_tz.md)
- [Collect debug data for support case](debugging.md)
- [Configure logging](logging.md)
- [Enable maintenance mode](maintenance.md)
- [Start metrics collection and monitoring](metrics.md)
- [Override detected total memory](override_detected_memory.md)
- [Manually recover cluster if you still have volumes with data](recovery.md)
- [How to rotate Pod](rotate-pod.md)

View file

@ -5,7 +5,7 @@
### Log level
To adjust logging level of the operator, you can use `operator.args` in chart template value
as described in [Additional configuration](./additional_configuration.md).
as described in [Additional configuration](additional_configuration.md).
For example, to set log level to `INFO` and `DEBUG` for `requests` package, you can use the following value:
```yaml

View file

@ -0,0 +1,29 @@
# Maintenance mode
## ArangoDeployment maintenance
When enabled, operator will pause reconciliation loop for specified ArangoDeployment.
Maintenance on ArangoDeployment can be enabled using annotation.
Key: `deployment.arangodb.com/maintenance`
Value: `true`
To enable maintenance mode for ArangoDeployment kubectl command can be used:
`kubectl annotate arangodeployment deployment deployment.arangodb.com/maintenance=true`
To disable maintenance mode for ArangoDeployment kubectl command can be used:
`kubectl annotate --overwrite arangodeployment deployment deployment.arangodb.com/maintenance-`
## Cluster maintenance
It is possible to put ArangoDB cluster into [agecy supervision mode](https://docs.arangodb.com/3.11/develop/http/cluster/#maintenance).
Use `spec.database.maintenance` field of ArangoDeployment CR to configure that:
```
spec:
# ...
database:
maintenance: true
```

View file

@ -1,10 +1,10 @@
# Metrics
# Metrics collection
Operator provides metrics of its operations in a format supported by [Prometheus](https://prometheus.io/).
The metrics are exposed through HTTPS on port `8528` under path `/metrics`.
For a full list of available metrics, see [here](./../generated/metrics/README.md).
For a full list of available metrics, see [here](../generated/metrics/README.md).
#### Contents
- [Integration with standard Prometheus installation (no TLS)](#Integration-with-standard-Prometheus-installation-no-TLS)

View file

@ -1,4 +1,4 @@
# Resource Management
# Override detected total memory
## overrideDetectedTotalMemory

View file

@ -1,10 +1,8 @@
# Rotation
## ArangoDeployment
# How to rotate Pod
Rotation of ArangoDeployment Pods can be triggered by Pod deletion or by annotation (safe way).
Using annotation Pods gonna be rotated one-by-one which will keep cluster alive.
Using annotation is preferred way to rotate pods while keeping cluster in health state.
Key: `deployment.arangodb.com/rotate`
Value: `true`

View file

@ -1,3 +0,0 @@
# Supported Providers
- [Amazon EKS](./eks/README.md)

View file

@ -141,26 +141,25 @@ func Test_GenerateAPIDocs(t *testing.T) {
root := os.Getenv("ROOT")
require.NotEmpty(t, root)
generateDocs(t, map[string]map[string]interface{}{
docs := map[string]map[string]interface{}{
"ArangoDeployment.V1": {
"Spec": api.ArangoDeployment{}.Spec,
},
},
fmt.Sprintf("%s/pkg/apis/deployment/v1", root))
generateDocs(t, map[string]map[string]interface{}{
"ArangoMember.V1": {
"Spec": api.ArangoMember{}.Spec,
},
},
fmt.Sprintf("%s/pkg/apis/deployment/v1", root))
}
resultPaths := generateDocs(t, docs, fmt.Sprintf("%s/pkg/apis/deployment/v1", root))
generateIndex(t, resultPaths)
}
func generateDocs(t *testing.T, objects map[string]map[string]interface{}, paths ...string) {
func generateDocs(t *testing.T, objects map[string]map[string]interface{}, paths ...string) map[string]string {
root := os.Getenv("ROOT")
require.NotEmpty(t, root)
docs, fs := getDocs(t, paths...)
outPaths := make(map[string]string)
for object, sections := range objects {
t.Run(object, func(t *testing.T) {
@ -237,7 +236,10 @@ func generateDocs(t *testing.T, objects map[string]map[string]interface{}, paths
})
}
out, err := os.OpenFile(path.Join(root, "docs/api", fmt.Sprintf("%s.md", object)), os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
fileName := fmt.Sprintf("%s.md", object)
outPaths[object] = fileName
outPath := path.Join(root, "docs/api", fmt.Sprintf("%s.md", object))
out, err := os.OpenFile(outPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
require.NoError(t, err)
defer func() {
@ -254,6 +256,26 @@ func generateDocs(t *testing.T, objects map[string]map[string]interface{}, paths
}
})
}
return outPaths
}
func generateIndex(t *testing.T, apiDocs map[string]string) {
root := os.Getenv("ROOT")
require.NotEmpty(t, root)
outPath := path.Join(root, "docs/api/README.md")
out, err := os.OpenFile(outPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
require.NoError(t, err)
defer func() {
require.NoError(t, out.Close())
}()
write(t, out, "# Custom Resources API Reference\n\n")
for name, filePath := range apiDocs {
write(t, out, " - [%s](./%s)\n", name, filePath)
}
write(t, out, "\n")
}
func write(t *testing.T, out io.Writer, format string, args ...interface{}) {

View file

@ -128,7 +128,7 @@ features:
- operatorVersion: 1.2.0
state: Production
- name: Operator Ephemeral Volumes
doc: docs/design/features/ephemeral_volumes.md
doc: docs/features/ephemeral_volumes.md
flag: --deployment.feature.ephemeral-volumes
enabled: false
releases:
@ -137,14 +137,14 @@ features:
- operatorVersion: 1.2.31
state: Beta
- name: Failover Leader service
doc: docs/design/features/failover_leader_service.md
doc: docs/features/failover_leader_service.md
flag: --deployment.feature.failover-leadership
enabled: false
releases:
- operatorVersion: 1.2.13
state: Production
- name: Spec Default Restore
doc: docs/design/features/deployment_spec_defaults.md
doc: docs/features/deployment_spec_defaults.md
flag: --deployment.feature.deployment-spec-defaults-restore
enabled: true
remarks: If set to False Operator will not change ArangoDeployment Spec
@ -154,7 +154,7 @@ features:
- operatorVersion: 1.2.25
state: Beta
- name: Force Rebuild Out Synced Shards
doc: docs/design/features/rebuild_out_synced_shards.md
doc: docs/features/rebuild_out_synced_shards.md
flag: --deployment.feature.force-rebuild-out-synced-shards
enabled: false
remarks: It should be used only if user is aware of the risks.
@ -162,7 +162,7 @@ features:
- operatorVersion: 1.2.27
state: Production
- name: Rebalancer
doc: docs/design/features/rebalancer.md
doc: docs/features/rebalancer.md
enabled: true
operatorEditions: Enterprise
arangoDBEditions: Enterprise
@ -172,7 +172,7 @@ features:
- operatorVersion: 1.2.15
state: Production
- name: Rebalancer V2
doc: docs/design/features/rebalancer_v2.md
doc: docs/features/rebalancer_v2.md
arangoDBVersion: ">= 3.10.0"
flag: --deployment.feature.rebalancer-v2
enabled: false
@ -180,7 +180,7 @@ features:
- operatorVersion: 1.2.31
state: Alpha
- name: Secured containers
doc: docs/design/features/secured_containers.md
doc: docs/features/secured_containers.md
flag: --deployment.feature.secured-containers
enabled: false
remarks: If set to True Operator will run containers in secure mode

55
internal/features_test.go Normal file
View file

@ -0,0 +1,55 @@
//
// DISCLAIMER
//
// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
package internal
import (
"os"
"path"
"testing"
"github.com/stretchr/testify/require"
)
func Test_GenerateFeaturesIndex(t *testing.T) {
root := os.Getenv("ROOT")
require.NotEmpty(t, root)
outPath := path.Join(root, "docs/features/README.md")
out, err := os.OpenFile(outPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
require.NoError(t, err)
defer func() {
require.NoError(t, out.Close())
}()
const basePath = "docs/features"
write(t, out, "## List of Community Edition features\n")
section, err := GenerateReadmeFeatures(root, basePath, true)
require.NoError(t, err)
write(t, out, section)
write(t, out, "\n")
write(t, out, "## List of Enterprise Edition features\n")
section, err = GenerateReadmeFeatures(root, basePath, false)
require.NoError(t, err)
write(t, out, section)
write(t, out, "\n")
}

View file

@ -24,6 +24,7 @@ import (
"fmt"
"os"
"path"
"path/filepath"
"sort"
"strings"
@ -109,13 +110,14 @@ func GenerateReadme(root string) error {
readmeSections["kubernetesVersionsTable"] = section
}
if section, err := GenerateReadmeFeatures(root, true); err != nil {
const basePath = ""
if section, err := GenerateReadmeFeatures(root, basePath, true); err != nil {
return err
} else {
readmeSections["featuresEnterpriseTable"] = section
}
if section, err := GenerateReadmeFeatures(root, false); err != nil {
if section, err := GenerateReadmeFeatures(root, basePath, false); err != nil {
return err
} else {
readmeSections["featuresCommunityTable"] = section
@ -134,7 +136,7 @@ func GenerateReadme(root string) error {
return nil
}
func GenerateReadmeFeatures(root string, eeOnly bool) (string, error) {
func GenerateReadmeFeatures(root, basePath string, eeOnly bool) (string, error) {
feature := md.NewColumn("Feature", md.ColumnLeftAlign)
introduced := md.NewColumn("Introduced", md.ColumnLeftAlign)
oVersion := md.NewColumn("Operator Version", md.ColumnLeftAlign)
@ -204,7 +206,12 @@ func GenerateReadmeFeatures(root string, eeOnly bool) (string, error) {
n := f.Name
if v := util.First(r.Doc, f.Doc); v != nil {
n = fmt.Sprintf("[%s](%s)", n, *v)
p, err := filepath.Rel(basePath, *v)
if err != nil {
return "", err
}
n = fmt.Sprintf("[%s](%s)", n, p)
}
if err := t.AddRow(map[md.Column]string{

View file

@ -252,7 +252,7 @@ type DeploymentSpec struct {
// Architecture defines the list of supported architectures.
// First element on the list is marked as default architecture.
// +doc/link: Architecture Change|/docs/design/arch_change.md
// +doc/link: Architecture Change|/docs/how-to/arch_change.md
// +doc/type: []string
// +doc/default: ['amd64']
Architecture ArangoDeploymentArchitecture `json:"architecture,omitempty"`