1
0
Fork 0
mirror of https://github.com/arangodb/kube-arangodb.git synced 2024-12-14 11:57:37 +00:00
kube-arangodb/tests/resilience_test.go

368 lines
13 KiB
Go
Raw Normal View History

2018-03-21 17:28:08 +00:00
package tests
import (
"context"
"fmt"
"testing"
2018-03-22 11:29:32 +00:00
"time"
2018-03-21 17:28:08 +00:00
"github.com/stretchr/testify/require"
2018-03-26 08:11:55 +00:00
"github.com/dchest/uniuri"
2018-03-21 17:28:08 +00:00
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2018-03-26 08:11:55 +00:00
2018-03-21 17:28:08 +00:00
driver "github.com/arangodb/go-driver"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha"
"github.com/arangodb/kube-arangodb/pkg/client"
2018-03-26 08:11:55 +00:00
"github.com/arangodb/kube-arangodb/pkg/util/retry"
2018-03-21 17:28:08 +00:00
)
2018-03-26 08:11:55 +00:00
// TestResiliencePod
2018-03-21 17:28:08 +00:00
// Tests handling of individual pod deletions
func TestResiliencePod(t *testing.T) {
longOrSkip(t)
c := client.MustNewInCluster()
kubecli := mustNewKubeClient(t)
ns := getNamespace(t)
//fmt.Printf("There are %d pods in the cluster\n", len(pods.Items))
// Prepare deployment config
depl := newDeployment("test-pod-resilience-" + uniuri.NewLen(4))
2018-03-26 08:11:55 +00:00
depl.Spec.Mode = api.NewMode(api.DeploymentModeCluster)
2018-03-21 17:28:08 +00:00
depl.Spec.SetDefaults(depl.GetName()) // this must be last
// Create deployment
apiObject, err := c.DatabaseV1alpha().ArangoDeployments(ns).Create(depl)
if err != nil {
t.Fatalf("Create deployment failed: %v", err)
}
defer deferedCleanupDeployment(c, depl.GetName(), ns)
2018-03-21 17:28:08 +00:00
// Wait for deployment to be ready
2018-03-27 13:53:02 +00:00
if _, err = waitUntilDeployment(c, depl.GetName(), ns, deploymentIsReady()); err != nil {
2018-03-21 17:28:08 +00:00
t.Fatalf("Deployment not running in time: %v", err)
}
// Create a database client
ctx := context.Background()
client := mustNewArangodDatabaseClient(ctx, kubecli, apiObject, t)
// Wait for cluster to be completely ready
if err := waitUntilClusterHealth(client, func(h driver.ClusterHealth) error {
return clusterHealthEqualsSpec(h, apiObject.Spec)
}); err != nil {
t.Fatalf("Cluster not running in expected health in time: %v", err)
}
// Fetch latest status so we know all member details
apiObject, err = c.DatabaseV1alpha().ArangoDeployments(ns).Get(depl.GetName(), metav1.GetOptions{})
if err != nil {
t.Fatalf("Failed to get deployment: %v", err)
}
2018-03-26 08:11:55 +00:00
// Delete one pod after the other
apiObject.ForeachServerGroup(func(group api.ServerGroup, spec api.ServerGroupSpec, status *api.MemberStatusList) error {
for _, m := range *status {
// Get current pod so we can compare UID later
originalPod, err := kubecli.CoreV1().Pods(ns).Get(m.PodName, metav1.GetOptions{})
if err != nil {
t.Fatalf("Failed to get pod %s: %v", m.PodName, err)
}
if err := kubecli.CoreV1().Pods(ns).Delete(m.PodName, &metav1.DeleteOptions{}); err != nil {
t.Fatalf("Failed to delete pod %s: %v", m.PodName, err)
}
// Wait for pod to return with different UID
op := func() error {
pod, err := kubecli.CoreV1().Pods(ns).Get(m.PodName, metav1.GetOptions{})
if err != nil {
return maskAny(err)
}
if pod.GetUID() == originalPod.GetUID() {
return fmt.Errorf("Still original pod")
}
return nil
}
if err := retry.Retry(op, time.Minute); err != nil {
t.Fatalf("Pod did not restart: %v", err)
}
// Wait for cluster to be completely ready
if err := waitUntilClusterHealth(client, func(h driver.ClusterHealth) error {
return clusterHealthEqualsSpec(h, apiObject.Spec)
}); err != nil {
t.Fatalf("Cluster not running in expected health in time: %v", err)
}
2018-03-22 08:12:01 +00:00
}
2018-03-26 08:11:55 +00:00
return nil
}, &apiObject.Status)
2018-03-22 08:12:01 +00:00
// Cleanup
removeDeployment(c, depl.GetName(), ns)
}
2018-03-22 11:29:32 +00:00
// TestResiliencePVC
// Tests handling of individual pod deletions
func TestResiliencePVC(t *testing.T) {
longOrSkip(t)
c := client.MustNewInCluster()
kubecli := mustNewKubeClient(t)
ns := getNamespace(t)
// Prepare deployment config
depl := newDeployment("test-pvc-resilience-" + uniuri.NewLen(4))
2018-03-26 08:11:55 +00:00
depl.Spec.Mode = api.NewMode(api.DeploymentModeCluster)
2018-03-22 11:29:32 +00:00
depl.Spec.SetDefaults(depl.GetName()) // this must be last
// Create deployment
apiObject, err := c.DatabaseV1alpha().ArangoDeployments(ns).Create(depl)
if err != nil {
t.Fatalf("Create deployment failed: %v", err)
}
defer deferedCleanupDeployment(c, depl.GetName(), ns)
2018-03-22 11:29:32 +00:00
// Wait for deployment to be ready
2018-03-27 13:53:02 +00:00
if _, err = waitUntilDeployment(c, depl.GetName(), ns, deploymentIsReady()); err != nil {
2018-03-22 11:29:32 +00:00
t.Fatalf("Deployment not running in time: %v", err)
}
// Create a database client
ctx := context.Background()
client := mustNewArangodDatabaseClient(ctx, kubecli, apiObject, t)
// Wait for cluster to be completely ready
if err := waitUntilClusterHealth(client, func(h driver.ClusterHealth) error {
return clusterHealthEqualsSpec(h, apiObject.Spec)
}); err != nil {
t.Fatalf("Cluster not running in expected health in time: %v", err)
}
// Fetch latest status so we know all member details
apiObject, err = c.DatabaseV1alpha().ArangoDeployments(ns).Get(depl.GetName(), metav1.GetOptions{})
if err != nil {
t.Fatalf("Failed to get deployment: %v", err)
}
2018-03-26 08:11:55 +00:00
// Delete one pvc after the other
apiObject.ForeachServerGroup(func(group api.ServerGroup, spec api.ServerGroupSpec, status *api.MemberStatusList) error {
2018-03-26 08:47:07 +00:00
if group == api.ServerGroupCoordinators {
// Coordinators have no PVC
return nil
}
2018-03-26 08:11:55 +00:00
for _, m := range *status {
// Get current pvc so we can compare UID later
originalPVC, err := kubecli.CoreV1().PersistentVolumeClaims(ns).Get(m.PersistentVolumeClaimName, metav1.GetOptions{})
if err != nil {
t.Fatalf("Failed to get pvc %s: %v", m.PersistentVolumeClaimName, err)
}
if err := kubecli.CoreV1().PersistentVolumeClaims(ns).Delete(m.PersistentVolumeClaimName, &metav1.DeleteOptions{}); err != nil {
t.Fatalf("Failed to delete pvc %s: %v", m.PersistentVolumeClaimName, err)
}
// Wait for pvc to return with different UID
op := func() error {
pvc, err := kubecli.CoreV1().PersistentVolumeClaims(ns).Get(m.PersistentVolumeClaimName, metav1.GetOptions{})
if err != nil {
return maskAny(err)
}
if pvc.GetUID() == originalPVC.GetUID() {
return fmt.Errorf("Still original pvc")
}
return nil
}
if err := retry.Retry(op, time.Minute); err != nil {
t.Fatalf("PVC did not restart: %v", err)
}
// Wait for cluster to be completely ready
if err := waitUntilClusterHealth(client, func(h driver.ClusterHealth) error {
return clusterHealthEqualsSpec(h, apiObject.Spec)
}); err != nil {
t.Fatalf("Cluster not running in expected health in time: %v", err)
}
2018-03-22 11:29:32 +00:00
}
2018-03-26 08:11:55 +00:00
return nil
}, &apiObject.Status)
2018-03-22 11:29:32 +00:00
// Cleanup
removeDeployment(c, depl.GetName(), ns)
}
2018-03-26 08:11:55 +00:00
// TestResiliencePVDBServer
// Tests handling of entire PVs of dbservers being removed.
func TestResiliencePVDBServer(t *testing.T) {
longOrSkip(t)
c := client.MustNewInCluster()
kubecli := mustNewKubeClient(t)
ns := getNamespace(t)
// Prepare deployment config
depl := newDeployment("test-pv-prmr-resi-" + uniuri.NewLen(4))
depl.Spec.Mode = api.NewMode(api.DeploymentModeCluster)
depl.Spec.SetDefaults(depl.GetName()) // this must be last
// Create deployment
apiObject, err := c.DatabaseV1alpha().ArangoDeployments(ns).Create(depl)
if err != nil {
t.Fatalf("Create deployment failed: %v", err)
}
2018-04-06 08:44:21 +00:00
defer deferedCleanupDeployment(c, depl.GetName(), ns)
// Wait for deployment to be ready
if _, err = waitUntilDeployment(c, depl.GetName(), ns, deploymentIsReady()); err != nil {
t.Fatalf("Deployment not running in time: %v", err)
}
// Create a database client
ctx := context.Background()
client := mustNewArangodDatabaseClient(ctx, kubecli, apiObject, t)
// Wait for cluster to be completely ready
if err := waitUntilClusterHealth(client, func(h driver.ClusterHealth) error {
return clusterHealthEqualsSpec(h, apiObject.Spec)
}); err != nil {
t.Fatalf("Cluster not running in expected health in time: %v", err)
}
// Fetch latest status so we know all member details
apiObject, err = c.DatabaseV1alpha().ArangoDeployments(ns).Get(depl.GetName(), metav1.GetOptions{})
if err != nil {
t.Fatalf("Failed to get deployment: %v", err)
}
// Delete one pv, pvc & pod after the other
apiObject.ForeachServerGroup(func(group api.ServerGroup, spec api.ServerGroupSpec, status *api.MemberStatusList) error {
if group != api.ServerGroupDBServers {
// Agents cannot be replaced with a new ID
// Coordinators, Sync masters/workers have no persistent storage
return nil
}
for i, m := range *status {
// Only test first 2
if i >= 2 {
continue
}
// Get current pvc so we can compare UID later
originalPVC, err := kubecli.CoreV1().PersistentVolumeClaims(ns).Get(m.PersistentVolumeClaimName, metav1.GetOptions{})
if err != nil {
t.Fatalf("Failed to get pvc %s: %v", m.PersistentVolumeClaimName, err)
}
// Get current pv
pvName := originalPVC.Spec.VolumeName
require.NotEmpty(t, pvName, "VolumeName of %s must be non-empty", originalPVC.GetName())
// Delete PV
if err := kubecli.CoreV1().PersistentVolumes().Delete(pvName, &metav1.DeleteOptions{}); err != nil {
t.Fatalf("Failed to delete pv %s: %v", pvName, err)
}
// Delete PVC
if err := kubecli.CoreV1().PersistentVolumeClaims(ns).Delete(m.PersistentVolumeClaimName, &metav1.DeleteOptions{}); err != nil {
t.Fatalf("Failed to delete pvc %s: %v", m.PersistentVolumeClaimName, err)
}
// Delete Pod
if err := kubecli.CoreV1().Pods(ns).Delete(m.PodName, &metav1.DeleteOptions{}); err != nil {
t.Fatalf("Failed to delete pod %s: %v", m.PodName, err)
}
// Wait for cluster to be healthy again with the same number of
// dbservers, but the current dbserver being replaced.
expectedDBServerCount := apiObject.Spec.DBServers.GetCount()
unexpectedID := m.ID
pred := func(depl *api.ArangoDeployment) error {
if len(depl.Status.Members.DBServers) != expectedDBServerCount {
return maskAny(fmt.Errorf("Expected %d dbservers, got %d", expectedDBServerCount, len(depl.Status.Members.DBServers)))
}
if depl.Status.Members.ContainsID(unexpectedID) {
return maskAny(fmt.Errorf("Member %s should be gone", unexpectedID))
}
return nil
}
if _, err := waitUntilDeployment(c, apiObject.GetName(), ns, pred, time.Minute*5); err != nil {
t.Fatalf("Deployment not ready in time: %v", err)
}
// Wait for cluster to be completely ready
if err := waitUntilClusterHealth(client, func(h driver.ClusterHealth) error {
return clusterHealthEqualsSpec(h, apiObject.Spec)
}); err != nil {
t.Fatalf("Cluster not running in expected health in time: %v", err)
}
}
return nil
}, &apiObject.Status)
// Cleanup
removeDeployment(c, depl.GetName(), ns)
}
2018-03-22 08:12:01 +00:00
// TestResilienceService
// Tests handling of individual service deletions
func TestResilienceService(t *testing.T) {
longOrSkip(t)
c := client.MustNewInCluster()
kubecli := mustNewKubeClient(t)
ns := getNamespace(t)
// Prepare deployment config
depl := newDeployment("test-service-resilience-" + uniuri.NewLen(4))
2018-03-26 08:11:55 +00:00
depl.Spec.Mode = api.NewMode(api.DeploymentModeCluster)
2018-03-22 08:12:01 +00:00
depl.Spec.SetDefaults(depl.GetName()) // this must be last
// Create deployment
apiObject, err := c.DatabaseV1alpha().ArangoDeployments(ns).Create(depl)
if err != nil {
t.Fatalf("Create deployment failed: %v", err)
}
defer deferedCleanupDeployment(c, depl.GetName(), ns)
2018-03-22 08:12:01 +00:00
// Wait for deployment to be ready
2018-03-27 13:53:02 +00:00
if _, err = waitUntilDeployment(c, depl.GetName(), ns, deploymentIsReady()); err != nil {
2018-03-22 08:12:01 +00:00
t.Fatalf("Deployment not running in time: %v", err)
}
// Create a database client
ctx := context.Background()
client := mustNewArangodDatabaseClient(ctx, kubecli, apiObject, t)
// Wait for cluster to be completely ready
if err := waitUntilClusterHealth(client, func(h driver.ClusterHealth) error {
return clusterHealthEqualsSpec(h, apiObject.Spec)
}); err != nil {
t.Fatalf("Cluster not running in expected health in time: %v", err)
}
// Fetch latest status so we know all member details
apiObject, err = c.DatabaseV1alpha().ArangoDeployments(ns).Get(depl.GetName(), metav1.GetOptions{})
if err != nil {
t.Fatalf("Failed to get deployment: %v", err)
}
2018-03-26 08:11:55 +00:00
// Delete database service
// Get current pod so we can compare UID later
serviceName := apiObject.Status.ServiceName
originalService, err := kubecli.CoreV1().Services(ns).Get(serviceName, metav1.GetOptions{})
2018-03-22 08:12:01 +00:00
if err != nil {
2018-03-26 08:11:55 +00:00
t.Fatalf("Failed to get service %s: %v", serviceName, err)
}
if err := kubecli.CoreV1().Services(ns).Delete(serviceName, &metav1.DeleteOptions{}); err != nil {
t.Fatalf("Failed to delete service %s: %v", serviceName, err)
2018-03-22 08:12:01 +00:00
}
2018-03-26 08:11:55 +00:00
// Wait for service to return with different UID
op := func() error {
service, err := kubecli.CoreV1().Services(ns).Get(serviceName, metav1.GetOptions{})
if err != nil {
return maskAny(err)
}
if service.GetUID() == originalService.GetUID() {
return fmt.Errorf("Still original service")
2018-03-21 17:28:08 +00:00
}
2018-03-26 08:11:55 +00:00
return nil
}
if err := retry.Retry(op, time.Minute); err != nil {
t.Fatalf("PVC did not restart: %v", err)
}
// Wait for cluster to be completely ready
if err := waitUntilClusterHealth(client, func(h driver.ClusterHealth) error {
return clusterHealthEqualsSpec(h, apiObject.Spec)
}); err != nil {
t.Fatalf("Cluster not running in expected health in time: %v", err)
2018-03-21 17:28:08 +00:00
}
// Cleanup
removeDeployment(c, depl.GetName(), ns)
}