1
0
Fork 0
mirror of https://github.com/arangodb/kube-arangodb.git synced 2024-12-14 11:57:37 +00:00

[Feature] Graceful Shutdown (#824)

This commit is contained in:
Adam Janikowski 2021-10-29 09:41:41 +02:00 committed by GitHub
parent 6c9802911f
commit 1f7d272b53
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
32 changed files with 497 additions and 146 deletions

View file

@ -4,6 +4,7 @@
- Split & Unify Lifecycle management functionality
- Drop support for ArangoDB <= 3.5 (versions already EOL)
- Add new admin commands to fetch agency dump and agency state
- Add Graceful shutdown as finalizer (supports kubectl delete)
## [1.2.4](https://github.com/arangodb/kube-arangodb/tree/1.2.4) (2021-10-22)
- Replace `beta.kubernetes.io/arch` Pod label with `kubernetes.io/arch` using Silent Rotation

6
go.mod
View file

@ -3,7 +3,7 @@ module github.com/arangodb/kube-arangodb
go 1.16
replace (
github.com/arangodb/go-driver => github.com/arangodb/go-driver v0.0.0-20210621075908-e7a6fa0cbd18
github.com/arangodb/go-driver => github.com/arangodb/go-driver v1.2.1
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring => github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.46.0
github.com/prometheus-operator/prometheus-operator/pkg/client => github.com/prometheus-operator/prometheus-operator/pkg/client v0.46.0
github.com/stretchr/testify => github.com/stretchr/testify v1.5.1
@ -25,8 +25,8 @@ replace (
require (
github.com/arangodb-helper/go-certificates v0.0.0-20180821055445-9fca24fc2680
github.com/arangodb/arangosync-client v0.7.0
github.com/arangodb/go-driver v0.0.0-20210621075908-e7a6fa0cbd18
github.com/arangodb/go-driver/v2 v2.0.0-20211001173946-eafa9b638e13
github.com/arangodb/go-driver v1.2.1
github.com/arangodb/go-driver/v2 v2.0.0-20211021031401-d92dcd5a4c83
github.com/arangodb/go-upgrade-rules v0.0.0-20180809110947-031b4774ff21
github.com/cenkalti/backoff v2.2.1+incompatible
github.com/dchest/uniuri v0.0.0-20160212164326-8902c56451e9

7
go.sum
View file

@ -44,8 +44,10 @@ github.com/arangodb/arangosync-client v0.7.0 h1:3vLOVnMyr5vGlPA0OHxJL9Wyy49JJwN0
github.com/arangodb/arangosync-client v0.7.0/go.mod h1:g+JcxH3C63wKaJPnPr9nggYoGbt/bYCWpfcRG0NSodY=
github.com/arangodb/go-driver v0.0.0-20210621075908-e7a6fa0cbd18 h1:3J0tqp5eQ8ptGOeeu7vo92RKf24bOA7MFy0z3uPiTWg=
github.com/arangodb/go-driver v0.0.0-20210621075908-e7a6fa0cbd18/go.mod h1:3NUekcRLpgheFIGEwcOvxilEW73MV1queNKW58k7sdc=
github.com/arangodb/go-driver/v2 v2.0.0-20211001173946-eafa9b638e13 h1:5egTRo3Met3xXUVj/Pbn1gXeY2C4bQZycJoHSnndfig=
github.com/arangodb/go-driver/v2 v2.0.0-20211001173946-eafa9b638e13/go.mod h1:X3uG4XbfQS35AjsFJLwNLyA6UZofNV5ufe2KoNxcMO0=
github.com/arangodb/go-driver v1.2.1 h1:HREDHhDmzdIWxHmfkfTESbYUnRjESjPh4WUuXq7FZa8=
github.com/arangodb/go-driver v1.2.1/go.mod h1:zdDkJJnCj8DAkfbtIjIXnsTrWIiy6VhP3Vy14p+uQeY=
github.com/arangodb/go-driver/v2 v2.0.0-20211021031401-d92dcd5a4c83 h1:PCbi3alUFastUw6InBKGEXqniveJJcQuMYspubJMRS8=
github.com/arangodb/go-driver/v2 v2.0.0-20211021031401-d92dcd5a4c83/go.mod h1:B8byYwvt1mDOQzpjiMuDTP5jOif/Y5dcEJtkdvPB7HY=
github.com/arangodb/go-upgrade-rules v0.0.0-20180809110947-031b4774ff21 h1:+W7D5ttxi/Ygh/39vialtypE23p9KI7P0J2qtoqUV4w=
github.com/arangodb/go-upgrade-rules v0.0.0-20180809110947-031b4774ff21/go.mod h1:RkPIG6JJ2pcJUoymc18NxAJGraZd+iAEVnOTDjZey/w=
github.com/arangodb/go-velocypack v0.0.0-20200318135517-5af53c29c67e h1:Xg+hGrY2LcQBbxd0ZFdbGSyRKTYMZCfBbw/pMJFOk1g=
@ -184,6 +186,7 @@ github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXP
github.com/golang-jwt/jwt v3.2.1+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I=
github.com/golang-jwt/jwt v3.2.2+incompatible h1:IfV12K8xAKAnZqdXVzCZ+TOjboZ2keLg81eXfW3O+oY=
github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=

View file

@ -25,10 +25,16 @@ package main
import (
"context"
"io"
"net"
"os"
"path/filepath"
"strconv"
"time"
"github.com/arangodb/kube-arangodb/pkg/backup/utils"
"github.com/arangodb/kube-arangodb/pkg/util/retry"
"github.com/arangodb/kube-arangodb/pkg/version"
"github.com/spf13/cobra"
@ -54,6 +60,10 @@ var (
Run: cmdLifecyclePreStopRunFinalizer,
Hidden: true,
}
cmdLifecyclePreStopPort = &cobra.Command{
Use: "port",
Hidden: true,
}
cmdLifecycleCopy = &cobra.Command{
Use: "copy",
Run: cmdLifecycleCopyRun,
@ -68,7 +78,15 @@ var (
func init() {
cmdMain.AddCommand(cmdLifecycle)
cmdLifecyclePreStop.AddCommand(cmdLifecyclePreStopFinalizers)
var preStopPort cmdLifecyclePreStopRunPort
cmdLifecyclePreStopPort.RunE = preStopPort.run
f := cmdLifecyclePreStopPort.Flags()
f.DurationVar(&preStopPort.timeout, "timeout", 6*60*time.Minute, "PreStopTimeout")
cmdLifecyclePreStop.AddCommand(cmdLifecyclePreStopFinalizers, cmdLifecyclePreStopPort)
cmdLifecycle.AddCommand(cmdLifecyclePreStop)
cmdLifecycle.AddCommand(cmdLifecycleCopy)
@ -162,3 +180,63 @@ func cmdLifecycleCopyRun(cmd *cobra.Command, args []string) {
cliLog.Info().Msgf("Executable copied to %s", targetPath)
}
type cmdLifecyclePreStopRunPort struct {
timeout time.Duration
}
// Wait until port 8529 is closed.
func (c *cmdLifecyclePreStopRunPort) run(cmd *cobra.Command, args []string) error {
address := net.JoinHostPort("127.0.0.1", strconv.Itoa(k8sutil.ArangoPort))
// Get environment
namespace := os.Getenv(constants.EnvOperatorPodNamespace)
if len(namespace) == 0 {
cliLog.Fatal().Msgf("%s environment variable missing", constants.EnvOperatorPodNamespace)
}
name := os.Getenv(constants.EnvOperatorPodName)
if len(name) == 0 {
cliLog.Fatal().Msgf("%s environment variable missing", constants.EnvOperatorPodName)
}
// Create kubernetes client
kubecli, err := k8sutil.NewKubeClient()
if err != nil {
cliLog.Fatal().Err(err).Msg("Failed to create Kubernetes client")
}
pods := kubecli.CoreV1().Pods(namespace)
recentErrors := 0
return retry.NewTimeout(func() error {
conn, err := net.DialTimeout("tcp", address, 500*time.Millisecond)
if err != nil {
return retry.Interrput()
}
conn.Close()
p, err := pods.Get(context.Background(), name, metav1.GetOptions{})
if k8sutil.IsNotFound(err) {
cliLog.Warn().Msg("Pod not found")
return nil
} else if err != nil {
recentErrors++
cliLog.Error().Err(err).Msg("Failed to get pod")
if recentErrors > 20 {
cliLog.Fatal().Err(err).Msg("Too many recent errors")
return nil
}
} else {
// We got our pod
finalizers := utils.StringList(p.GetFinalizers())
if !finalizers.Has(constants.FinalizerPodGracefulShutdown) {
return retry.Interrput()
}
}
return nil
}).Timeout(125*time.Millisecond, c.timeout)
}

View file

@ -73,6 +73,8 @@ const (
ActionTypeShutdownMember ActionType = "ShutdownMember"
// ActionTypeResignLeadership causes a member to resign leadership.
ActionTypeResignLeadership ActionType = "ResignLeadership"
// ActionTypeKillMemberPod causes a pod to get delete request. It also waits until Delay finalizer will be removed.
ActionTypeKillMemberPod ActionType = "KillMemberPod"
// ActionTypeRotateMember causes a member to be shutdown and have it's pod removed.
ActionTypeRotateMember ActionType = "RotateMember"
// ActionTypeRotateStartMember causes a member to be shutdown and have it's pod removed. Do not wait to pod recover.

View file

@ -148,6 +148,8 @@ func (g ServerGroup) DefaultTerminationGracePeriod() time.Duration {
return time.Minute
case ServerGroupDBServers:
return time.Hour
case ServerGroupCoordinators:
return time.Hour
default:
return time.Second * 30
}

View file

@ -682,3 +682,16 @@ func (s *ServerGroupSpec) GetEntrypoint(defaultEntrypoint string) string {
return *s.Entrypoint
}
// GetShutdownDelay returns defined or default Group ShutdownDelay in seconds
func (s ServerGroupSpec) GetShutdownDelay(group ServerGroup) int {
if s.ShutdownDelay == nil {
switch group {
case ServerGroupCoordinators:
return 3
default:
return 0
}
}
return *s.ShutdownDelay
}

View file

@ -41,6 +41,7 @@ func (m MetricsMode) New() *MetricsMode {
return &m
}
// deprecated
func (m MetricsMode) GetMetricsEndpoint() string {
switch m {
case MetricsModeInternal:
@ -51,9 +52,12 @@ func (m MetricsMode) GetMetricsEndpoint() string {
}
const (
// deprecated
// MetricsModeExporter exporter mode for old exporter type
MetricsModeExporter MetricsMode = "exporter"
MetricsModeSidecar MetricsMode = "sidecar"
// deprecated
MetricsModeSidecar MetricsMode = "sidecar"
// deprecated
MetricsModeInternal MetricsMode = "internal"
)
@ -67,12 +71,14 @@ func (m *MetricsMode) Get() MetricsMode {
// MetricsSpec contains spec for arangodb exporter
type MetricsSpec struct {
Enabled *bool `json:"enabled,omitempty"`
Enabled *bool `json:"enabled,omitempty"`
// deprecated
Image *string `json:"image,omitempty"`
Authentication MetricsAuthenticationSpec `json:"authentication,omitempty"`
Resources v1.ResourceRequirements `json:"resources,omitempty"`
Mode *MetricsMode `json:"mode,omitempty"`
TLS *bool `json:"tls,omitempty"`
// deprecated
Mode *MetricsMode `json:"mode,omitempty"`
TLS *bool `json:"tls,omitempty"`
ServiceMonitor *MetricsServiceMonitorSpec `json:"serviceMonitor,omitempty"`
@ -100,11 +106,13 @@ func (s *MetricsSpec) IsEnabled() bool {
return util.BoolOrDefault(s.Enabled, false)
}
// deprecated
// HasImage returns whether a image was specified or not
func (s *MetricsSpec) HasImage() bool {
return s.Image != nil
}
// deprecated
// GetImage returns the Image or empty string
func (s *MetricsSpec) GetImage() string {
return util.StringOrDefault(s.Image)

View file

@ -73,6 +73,8 @@ const (
ActionTypeShutdownMember ActionType = "ShutdownMember"
// ActionTypeResignLeadership causes a member to resign leadership.
ActionTypeResignLeadership ActionType = "ResignLeadership"
// ActionTypeKillMemberPod causes a pod to get delete request. It also waits until Delay finalizer will be removed.
ActionTypeKillMemberPod ActionType = "KillMemberPod"
// ActionTypeRotateMember causes a member to be shutdown and have it's pod removed.
ActionTypeRotateMember ActionType = "RotateMember"
// ActionTypeRotateStartMember causes a member to be shutdown and have it's pod removed. Do not wait to pod recover.

View file

@ -148,6 +148,8 @@ func (g ServerGroup) DefaultTerminationGracePeriod() time.Duration {
return time.Minute
case ServerGroupDBServers:
return time.Hour
case ServerGroupCoordinators:
return time.Hour
default:
return time.Second * 30
}

View file

@ -0,0 +1,35 @@
//
// DISCLAIMER
//
// Copyright 2016-2021 ArangoDB GmbH, Cologne, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
package v2alpha1
const (
ServerGroupReservedContainerNameServer = "server"
ServerGroupReservedContainerNameExporter = "exporter"
)
func IsReservedServerGroupContainerName(name string) bool {
switch name {
case ServerGroupReservedContainerNameServer, ServerGroupReservedContainerNameExporter:
return true
default:
return false
}
}

View file

@ -682,3 +682,16 @@ func (s *ServerGroupSpec) GetEntrypoint(defaultEntrypoint string) string {
return *s.Entrypoint
}
// GetShutdownDelay returns defined or default Group ShutdownDelay in seconds
func (s ServerGroupSpec) GetShutdownDelay(group ServerGroup) int {
if s.ShutdownDelay == nil {
switch group {
case ServerGroupCoordinators:
return 3
default:
return 0
}
}
return *s.ShutdownDelay
}

View file

@ -890,67 +890,12 @@ func TestEnsurePod_ArangoDB_Core(t *testing.T) {
},
},
},
{
Name: "Agent Pod can not have metrics exporter",
ArangoDeployment: &api.ArangoDeployment{
Spec: api.DeploymentSpec{
Image: util.NewString(testImage),
Authentication: noAuthentication,
TLS: noTLS,
Metrics: metricsSpec,
},
},
Helper: func(t *testing.T, deployment *Deployment, testCase *testCaseStruct) {
deployment.status.last = api.DeploymentStatus{
Members: api.DeploymentStatusMembers{
Agents: api.MemberStatusList{
firstAgentStatus,
},
},
Images: createTestImages(false),
}
testCase.createTestPodData(deployment, api.ServerGroupAgents, firstAgentStatus)
testCase.ExpectedPod.ObjectMeta.Labels[k8sutil.LabelKeyArangoExporter] = testYes
},
ExpectedEvent: "member agent is created",
ExpectedPod: core.Pod{
Spec: core.PodSpec{
Volumes: []core.Volume{
k8sutil.CreateVolumeEmptyDir(k8sutil.ArangodVolumeName),
k8sutil.CreateVolumeWithSecret(k8sutil.ExporterJWTVolumeName, testExporterToken),
},
Containers: []core.Container{
{
Name: k8sutil.ServerContainerName,
Image: testImage,
Command: createTestCommandForAgent(firstAgentStatus.ID, false, false, false),
Ports: createTestPorts(),
VolumeMounts: []core.VolumeMount{
k8sutil.ArangodVolumeMount(),
},
Resources: emptyResources,
LivenessProbe: createTestLivenessProbe(httpProbe, false, "", k8sutil.ArangoPort),
ImagePullPolicy: core.PullIfNotPresent,
SecurityContext: securityContext.NewSecurityContext(),
},
testArangodbInternalExporterContainer(false, emptyResources),
},
RestartPolicy: core.RestartPolicyNever,
TerminationGracePeriodSeconds: &defaultAgentTerminationTimeout,
Hostname: testDeploymentName + "-" + api.ServerGroupAgentsString + "-" + firstAgentStatus.ID,
Subdomain: testDeploymentName + "-int",
Affinity: k8sutil.CreateAffinity(testDeploymentName, api.ServerGroupAgentsString,
false, ""),
},
},
},
{
Name: "DBserver Pod with internal metrics exporter",
ArangoDeployment: &api.ArangoDeployment{
Spec: api.DeploymentSpec{
Image: util.NewString(testImage),
Authentication: noAuthentication,
Authentication: authenticationSpec,
TLS: noTLS,
Metrics: metricsSpec,
},
@ -967,6 +912,12 @@ func TestEnsurePod_ArangoDB_Core(t *testing.T) {
testCase.createTestPodData(deployment, api.ServerGroupDBServers, firstDBServerStatus)
testCase.ExpectedPod.ObjectMeta.Labels[k8sutil.LabelKeyArangoExporter] = testYes
authorization, err := createTestToken(deployment, testCase, []string{"/_api/version"})
require.NoError(t, err)
testCase.ExpectedPod.Spec.Containers[0].LivenessProbe = createTestLivenessProbe(httpProbe, false,
authorization, k8sutil.ArangoPort)
},
ExpectedEvent: "member dbserver is created",
ExpectedPod: core.Pod{
@ -974,22 +925,23 @@ func TestEnsurePod_ArangoDB_Core(t *testing.T) {
Volumes: []core.Volume{
k8sutil.CreateVolumeEmptyDir(k8sutil.ArangodVolumeName),
k8sutil.CreateVolumeWithSecret(k8sutil.ExporterJWTVolumeName, testExporterToken),
k8sutil.CreateVolumeWithSecret(k8sutil.ClusterJWTSecretVolumeName, testJWTSecretName),
},
Containers: []core.Container{
{
Name: k8sutil.ServerContainerName,
Image: testImage,
Command: createTestCommandForDBServer(firstDBServerStatus.ID, false, false, false),
Command: createTestCommandForDBServer(firstDBServerStatus.ID, false, true, false),
Ports: createTestPorts(),
VolumeMounts: []core.VolumeMount{
k8sutil.ArangodVolumeMount(),
k8sutil.ClusterJWTVolumeMount(),
},
Resources: emptyResources,
LivenessProbe: createTestLivenessProbe(httpProbe, false, "", k8sutil.ArangoPort),
ImagePullPolicy: core.PullIfNotPresent,
SecurityContext: securityContext.NewSecurityContext(),
},
testArangodbInternalExporterContainer(false, emptyResources),
testArangodbInternalExporterContainer(false, true, emptyResources),
},
RestartPolicy: core.RestartPolicyNever,
TerminationGracePeriodSeconds: &defaultDBServerTerminationTimeout,
@ -1005,7 +957,7 @@ func TestEnsurePod_ArangoDB_Core(t *testing.T) {
ArangoDeployment: &api.ArangoDeployment{
Spec: api.DeploymentSpec{
Image: util.NewString(testImage),
Authentication: noAuthentication,
Authentication: authenticationSpec,
TLS: noTLS,
Metrics: api.MetricsSpec{
Enabled: util.NewBool(true),
@ -1029,6 +981,12 @@ func TestEnsurePod_ArangoDB_Core(t *testing.T) {
testCase.createTestPodData(deployment, api.ServerGroupDBServers, firstDBServerStatus)
testCase.ExpectedPod.ObjectMeta.Labels[k8sutil.LabelKeyArangoExporter] = testYes
authorization, err := createTestToken(deployment, testCase, []string{"/_api/version"})
require.NoError(t, err)
testCase.ExpectedPod.Spec.Containers[0].LivenessProbe = createTestLivenessProbe(httpProbe, false,
authorization, k8sutil.ArangoPort)
},
ExpectedEvent: "member dbserver is created",
ExpectedPod: core.Pod{
@ -1036,22 +994,23 @@ func TestEnsurePod_ArangoDB_Core(t *testing.T) {
Volumes: []core.Volume{
k8sutil.CreateVolumeEmptyDir(k8sutil.ArangodVolumeName),
k8sutil.CreateVolumeWithSecret(k8sutil.ExporterJWTVolumeName, testExporterToken),
k8sutil.CreateVolumeWithSecret(k8sutil.ClusterJWTSecretVolumeName, testJWTSecretName),
},
Containers: []core.Container{
{
Name: k8sutil.ServerContainerName,
Image: testImage,
Command: createTestCommandForDBServer(firstDBServerStatus.ID, false, false, false),
Command: createTestCommandForDBServer(firstDBServerStatus.ID, false, true, false),
Ports: createTestPorts(),
VolumeMounts: []core.VolumeMount{
k8sutil.ArangodVolumeMount(),
k8sutil.ClusterJWTVolumeMount(),
},
Resources: emptyResources,
LivenessProbe: createTestLivenessProbe(httpProbe, false, "", k8sutil.ArangoPort),
ImagePullPolicy: core.PullIfNotPresent,
SecurityContext: securityContext.NewSecurityContext(),
},
testArangodbInternalExporterContainer(false, k8sutil.ExtractPodResourceRequirement(resourcesUnfiltered)),
testArangodbInternalExporterContainer(false, true, k8sutil.ExtractPodResourceRequirement(resourcesUnfiltered)),
},
RestartPolicy: core.RestartPolicyNever,
TerminationGracePeriodSeconds: &defaultDBServerTerminationTimeout,
@ -1067,7 +1026,7 @@ func TestEnsurePod_ArangoDB_Core(t *testing.T) {
ArangoDeployment: &api.ArangoDeployment{
Spec: api.DeploymentSpec{
Image: util.NewString(testImage),
Authentication: noAuthentication,
Authentication: authenticationSpec,
TLS: noTLS,
Metrics: metricsSpec,
Lifecycle: api.LifecycleSpec{
@ -1087,6 +1046,12 @@ func TestEnsurePod_ArangoDB_Core(t *testing.T) {
testCase.createTestPodData(deployment, api.ServerGroupDBServers, firstDBServerStatus)
testCase.ExpectedPod.ObjectMeta.Labels[k8sutil.LabelKeyArangoExporter] = testYes
authorization, err := createTestToken(deployment, testCase, []string{"/_api/version"})
require.NoError(t, err)
testCase.ExpectedPod.Spec.Containers[0].LivenessProbe = createTestLivenessProbe(httpProbe, false,
authorization, k8sutil.ArangoPort)
},
config: Config{
OperatorImage: testImageOperator,
@ -1097,6 +1062,7 @@ func TestEnsurePod_ArangoDB_Core(t *testing.T) {
Volumes: []core.Volume{
k8sutil.CreateVolumeEmptyDir(k8sutil.ArangodVolumeName),
k8sutil.CreateVolumeWithSecret(k8sutil.ExporterJWTVolumeName, testExporterToken),
k8sutil.CreateVolumeWithSecret(k8sutil.ClusterJWTSecretVolumeName, testJWTSecretName),
k8sutil.LifecycleVolume(),
},
InitContainers: []core.Container{
@ -1106,19 +1072,19 @@ func TestEnsurePod_ArangoDB_Core(t *testing.T) {
{
Name: k8sutil.ServerContainerName,
Image: testImage,
Command: createTestCommandForDBServer(firstDBServerStatus.ID, false, false, false),
Command: createTestCommandForDBServer(firstDBServerStatus.ID, false, true, false),
Ports: createTestPorts(),
VolumeMounts: []core.VolumeMount{
k8sutil.ArangodVolumeMount(),
k8sutil.LifecycleVolumeMount(),
k8sutil.ClusterJWTVolumeMount(),
},
Resources: emptyResources,
Lifecycle: createTestLifecycle(),
LivenessProbe: createTestLivenessProbe(httpProbe, false, "", k8sutil.ArangoPort),
Lifecycle: createTestLifecycle(api.ServerGroupAgents),
ImagePullPolicy: core.PullIfNotPresent,
SecurityContext: securityContext.NewSecurityContext(),
},
testArangodbInternalExporterContainer(false, emptyResources),
testArangodbInternalExporterContainer(false, true, emptyResources),
},
RestartPolicy: core.RestartPolicyNever,
TerminationGracePeriodSeconds: &defaultDBServerTerminationTimeout,
@ -1134,7 +1100,7 @@ func TestEnsurePod_ArangoDB_Core(t *testing.T) {
ArangoDeployment: &api.ArangoDeployment{
Spec: api.DeploymentSpec{
Image: util.NewString(testImage),
Authentication: noAuthentication,
Authentication: authenticationSpec,
TLS: noTLS,
Metrics: metricsSpec,
},
@ -1151,6 +1117,12 @@ func TestEnsurePod_ArangoDB_Core(t *testing.T) {
testCase.createTestPodData(deployment, api.ServerGroupDBServers, firstDBServerStatus)
testCase.ExpectedPod.ObjectMeta.Labels[k8sutil.LabelKeyArangoExporter] = testYes
authorization, err := createTestToken(deployment, testCase, []string{"/_api/version"})
require.NoError(t, err)
testCase.ExpectedPod.Spec.Containers[0].LivenessProbe = createTestLivenessProbe(httpProbe, false,
authorization, k8sutil.ArangoPort)
},
config: Config{
OperatorImage: testImageOperator,
@ -1161,6 +1133,7 @@ func TestEnsurePod_ArangoDB_Core(t *testing.T) {
Volumes: []core.Volume{
k8sutil.CreateVolumeEmptyDir(k8sutil.ArangodVolumeName),
k8sutil.CreateVolumeWithSecret(k8sutil.ExporterJWTVolumeName, testExporterToken),
k8sutil.CreateVolumeWithSecret(k8sutil.ClusterJWTSecretVolumeName, testJWTSecretName),
},
InitContainers: []core.Container{
createTestLifecycleContainer(emptyResources),
@ -1170,19 +1143,19 @@ func TestEnsurePod_ArangoDB_Core(t *testing.T) {
{
Name: k8sutil.ServerContainerName,
Image: testImage,
Command: createTestCommandForDBServer(firstDBServerStatus.ID, false, false, false),
Command: createTestCommandForDBServer(firstDBServerStatus.ID, false, true, false),
Ports: createTestPorts(),
VolumeMounts: []core.VolumeMount{
k8sutil.ArangodVolumeMount(),
k8sutil.LifecycleVolumeMount(),
k8sutil.ClusterJWTVolumeMount(),
},
Resources: emptyResources,
Lifecycle: createTestLifecycle(),
LivenessProbe: createTestLivenessProbe(httpProbe, false, "", k8sutil.ArangoPort),
Lifecycle: createTestLifecycle(api.ServerGroupAgents),
ImagePullPolicy: core.PullIfNotPresent,
SecurityContext: securityContext.NewSecurityContext(),
},
testArangodbInternalExporterContainer(false, emptyResources),
testArangodbInternalExporterContainer(false, true, emptyResources),
},
RestartPolicy: core.RestartPolicyNever,
TerminationGracePeriodSeconds: &defaultDBServerTerminationTimeout,
@ -1326,14 +1299,14 @@ func TestEnsurePod_ArangoDB_Core(t *testing.T) {
runTestCases(t, testCases...)
}
func testArangodbInternalExporterContainer(secure bool, resources core.ResourceRequirements, ports ...int32) core.Container {
func testArangodbInternalExporterContainer(secure, auth bool, resources core.ResourceRequirements, ports ...int32) core.Container {
var port int32 = k8sutil.ArangoExporterPort
if len(ports) > 0 {
port = ports[0]
}
return core.Container{
c := core.Container{
Name: k8sutil.ExporterContainerName,
Image: testImage,
Command: createTestInternalExporterCommand(secure, port),
@ -1356,9 +1329,14 @@ func testArangodbInternalExporterContainer(secure bool, resources core.ResourceR
},
VolumeMounts: []core.VolumeMount{
k8sutil.LifecycleVolumeMount(),
k8sutil.ExporterJWTVolumeMount(),
},
}
if auth {
c.VolumeMounts = append(c.VolumeMounts, k8sutil.ExporterJWTVolumeMount())
}
return c
}
func createTestInternalExporterCommand(secure bool, port int32) []string {

View file

@ -157,7 +157,7 @@ func TestEnsurePod_ArangoDB_Encryption(t *testing.T) {
Image: testImage,
Command: createTestCommandForDBServer(firstDBServerStatus.ID, true, true, true),
Ports: createTestPorts(),
Lifecycle: createTestLifecycle(),
Lifecycle: createTestLifecycle(api.ServerGroupAgents),
LivenessProbe: createTestLivenessProbe(httpProbe, false, "", k8sutil.ArangoPort),
ImagePullPolicy: core.PullIfNotPresent,
SecurityContext: securityContext.NewSecurityContext(),
@ -171,7 +171,7 @@ func TestEnsurePod_ArangoDB_Encryption(t *testing.T) {
Resources: emptyResources,
},
func() core.Container {
c := testArangodbInternalExporterContainer(true, emptyResources)
c := testArangodbInternalExporterContainer(true, true, emptyResources)
c.VolumeMounts = append(c.VolumeMounts, k8sutil.TlsKeyfileVolumeMount())
return c
}(),

View file

@ -69,7 +69,6 @@ func TestEnsurePod_Metrics(t *testing.T) {
Spec: core.PodSpec{
Volumes: []core.Volume{
k8sutil.CreateVolumeEmptyDir(k8sutil.ArangodVolumeName),
k8sutil.CreateVolumeWithSecret(k8sutil.ExporterJWTVolumeName, testExporterToken),
},
Containers: []core.Container{
{
@ -85,7 +84,7 @@ func TestEnsurePod_Metrics(t *testing.T) {
ImagePullPolicy: core.PullIfNotPresent,
SecurityContext: securityContext.NewSecurityContext(),
},
testArangodbInternalExporterContainer(false, emptyResources, 9999),
testArangodbInternalExporterContainer(false, false, emptyResources, 9999),
},
RestartPolicy: core.RestartPolicyNever,
TerminationGracePeriodSeconds: &defaultDBServerTerminationTimeout,
@ -130,7 +129,6 @@ func TestEnsurePod_Metrics(t *testing.T) {
Spec: core.PodSpec{
Volumes: []core.Volume{
k8sutil.CreateVolumeEmptyDir(k8sutil.ArangodVolumeName),
k8sutil.CreateVolumeWithSecret(k8sutil.ExporterJWTVolumeName, testExporterToken),
},
Containers: []core.Container{
{
@ -146,7 +144,7 @@ func TestEnsurePod_Metrics(t *testing.T) {
ImagePullPolicy: core.PullIfNotPresent,
SecurityContext: securityContext.NewSecurityContext(),
},
testArangodbInternalExporterContainer(false, emptyResources),
testArangodbInternalExporterContainer(false, false, emptyResources),
},
RestartPolicy: core.RestartPolicyNever,
TerminationGracePeriodSeconds: &defaultDBServerTerminationTimeout,
@ -191,7 +189,6 @@ func TestEnsurePod_Metrics(t *testing.T) {
Spec: core.PodSpec{
Volumes: []core.Volume{
k8sutil.CreateVolumeEmptyDir(k8sutil.ArangodVolumeName),
k8sutil.CreateVolumeWithSecret(k8sutil.ExporterJWTVolumeName, testExporterToken),
},
Containers: []core.Container{
{
@ -217,7 +214,7 @@ func TestEnsurePod_Metrics(t *testing.T) {
ImagePullPolicy: core.PullIfNotPresent,
SecurityContext: securityContext.NewSecurityContext(),
},
testArangodbInternalExporterContainer(false, emptyResources),
testArangodbInternalExporterContainer(false, false, emptyResources),
},
RestartPolicy: core.RestartPolicyNever,
TerminationGracePeriodSeconds: &defaultDBServerTerminationTimeout,
@ -262,7 +259,6 @@ func TestEnsurePod_Metrics(t *testing.T) {
Spec: core.PodSpec{
Volumes: []core.Volume{
k8sutil.CreateVolumeEmptyDir(k8sutil.ArangodVolumeName),
k8sutil.CreateVolumeWithSecret(k8sutil.ExporterJWTVolumeName, testExporterToken),
},
Containers: []core.Container{
{
@ -288,7 +284,7 @@ func TestEnsurePod_Metrics(t *testing.T) {
ImagePullPolicy: core.PullIfNotPresent,
SecurityContext: securityContext.NewSecurityContext(),
},
testArangodbInternalExporterContainer(false, emptyResources),
testArangodbInternalExporterContainer(false, false, emptyResources),
},
RestartPolicy: core.RestartPolicyNever,
TerminationGracePeriodSeconds: &defaultAgentTerminationTimeout,

View file

@ -262,7 +262,7 @@ func TestEnsurePod_Sync_Master(t *testing.T) {
k8sutil.CreateEnvFieldPath(constants.EnvOperatorNodeNameArango, "spec.nodeName"),
},
ImagePullPolicy: core.PullIfNotPresent,
Lifecycle: createTestLifecycle(),
Lifecycle: createTestLifecycle(api.ServerGroupSyncMasters),
Resources: resourcesUnfiltered,
SecurityContext: securityContext.NewSecurityContext(),
VolumeMounts: []core.VolumeMount{
@ -356,7 +356,7 @@ func TestEnsurePod_Sync_Master(t *testing.T) {
},
Resources: emptyResources,
ImagePullPolicy: core.PullIfNotPresent,
Lifecycle: createTestLifecycle(),
Lifecycle: createTestLifecycle(api.ServerGroupSyncMasters),
SecurityContext: securityContext.NewSecurityContext(),
VolumeMounts: []core.VolumeMount{
k8sutil.LifecycleVolumeMount(),
@ -454,7 +454,7 @@ func TestEnsurePod_Sync_Worker(t *testing.T) {
k8sutil.CreateEnvFieldPath(constants.EnvOperatorNodeNameArango, "spec.nodeName"),
},
ImagePullPolicy: core.PullIfNotPresent,
Lifecycle: createTestLifecycle(),
Lifecycle: createTestLifecycle(api.ServerGroupSyncMasters),
Resources: k8sutil.ExtractPodResourceRequirement(resourcesUnfiltered),
SecurityContext: securityContext.NewSecurityContext(),
VolumeMounts: []core.VolumeMount{

View file

@ -124,6 +124,11 @@ func runTestCase(t *testing.T, testCase testCaseStruct) {
require.Equal(t, testCase.Features.EncryptionRotation, *features.EncryptionRotation().EnabledPointer())
*features.JWTRotation().EnabledPointer() = testCase.Features.JWTRotation
*features.TLSSNI().EnabledPointer() = testCase.Features.TLSSNI
if g := testCase.Features.Graceful; g != nil {
*features.GracefulShutdown().EnabledPointer() = *g
} else {
*features.GracefulShutdown().EnabledPointer() = features.GracefulShutdown().EnabledByDefault()
}
*features.TLSRotation().EnabledPointer() = testCase.Features.TLSRotation
}

View file

@ -79,6 +79,7 @@ const (
type testCaseFeatures struct {
TLSSNI, TLSRotation, JWTRotation, EncryptionRotation bool
Graceful *bool
}
type testCaseStruct struct {
@ -100,8 +101,12 @@ func createTestTLSVolume(serverGroupString, ID string) core.Volume {
k8sutil.CreateTLSKeyfileSecretName(testDeploymentName, serverGroupString, ID))
}
func createTestLifecycle() *core.Lifecycle {
lifecycle, _ := k8sutil.NewLifecycle()
func createTestLifecycle(group api.ServerGroup) *core.Lifecycle {
if group.IsArangosync() {
lifecycle, _ := k8sutil.NewLifecycleFinalizers()
return lifecycle
}
lifecycle, _ := k8sutil.NewLifecyclePort()
return lifecycle
}
@ -586,9 +591,14 @@ func finalizers(group api.ServerGroup) []string {
var finalizers []string
switch group {
case api.ServerGroupAgents:
finalizers = append(finalizers, constants.FinalizerPodAgencyServing)
finalizers = append(finalizers, constants.FinalizerPodGracefulShutdown)
case api.ServerGroupCoordinators:
finalizers = append(finalizers, constants.FinalizerDelayPodTermination)
finalizers = append(finalizers, constants.FinalizerPodGracefulShutdown)
case api.ServerGroupDBServers:
finalizers = append(finalizers, constants.FinalizerPodDrainDBServer)
finalizers = append(finalizers, constants.FinalizerPodGracefulShutdown)
case api.ServerGroupSingle:
finalizers = append(finalizers, constants.FinalizerPodGracefulShutdown)
}
return finalizers
@ -732,7 +742,7 @@ func addLifecycle(name string, uuidRequired bool, license string, group api.Serv
}
if len(p.Spec.Containers) > 0 {
p.Spec.Containers[0].Lifecycle = createTestLifecycle()
p.Spec.Containers[0].Lifecycle = createTestLifecycle(api.ServerGroupAgents)
}
if len(p.Spec.Containers) > 0 {

View file

@ -0,0 +1,40 @@
//
// DISCLAIMER
//
// Copyright 2016-2021 ArangoDB GmbH, Cologne, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
// Author Adam Janikowski
// Author Tomasz Mielech
//
package features
func init() {
registerFeature(gracefulShutdown)
}
var gracefulShutdown = &feature{
name: "graceful-shutdown",
description: "Define graceful shutdown, using finalizers, is enabled",
version: "3.6.0",
enterpriseRequired: false,
enabledByDefault: true,
}
func GracefulShutdown() Feature {
return gracefulShutdown
}

View file

@ -0,0 +1,111 @@
//
// DISCLAIMER
//
// Copyright 2020-2021 ArangoDB GmbH, Cologne, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
// Author Ewout Prangsma
// Author Tomasz Mielech
//
package reconcile
import (
"context"
"github.com/arangodb/kube-arangodb/pkg/backup/utils"
"github.com/arangodb/kube-arangodb/pkg/deployment/features"
"github.com/arangodb/kube-arangodb/pkg/util/constants"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
"github.com/rs/zerolog"
)
func init() {
registerAction(api.ActionTypeKillMemberPod, newKillMemberPodAction)
}
// newKillMemberPodAction creates a new Action that implements the given
// planned KillMemberPod action.
func newKillMemberPodAction(log zerolog.Logger, action api.Action, actionCtx ActionContext) Action {
a := &actionKillMemberPod{}
a.actionImpl = newActionImplDefRef(log, action, actionCtx, defaultTimeout)
return a
}
// actionKillMemberPod implements an KillMemberPod.
type actionKillMemberPod struct {
// actionImpl implement timeout and member id functions
actionImpl
}
// Start performs the start of the action.
// Returns true if the action is completely finished, false in case
// the start time needs to be recorded and a ready condition needs to be checked.
func (a *actionKillMemberPod) Start(ctx context.Context) (bool, error) {
if !features.GracefulShutdown().Enabled() {
return true, nil
}
log := a.log
m, ok := a.actionCtx.GetMemberStatusByID(a.action.MemberID)
if !ok {
log.Error().Msg("No such member")
return true, nil
}
if err := a.actionCtx.DeletePod(ctx, m.PodName); err != nil {
log.Error().Err(err).Msg("Unable to kill pod")
return true, nil
}
return false, nil
}
// CheckProgress checks the progress of the action.
// Returns: ready, abort, error.
func (a *actionKillMemberPod) CheckProgress(ctx context.Context) (bool, bool, error) {
if !features.GracefulShutdown().Enabled() {
return true, false, nil
}
log := a.log
m, ok := a.actionCtx.GetMemberStatusByID(a.action.MemberID)
if !ok {
log.Error().Msg("No such member")
return true, false, nil
}
p, ok := a.actionCtx.GetCachedStatus().Pod(m.PodName)
if !ok {
log.Error().Msg("No such member")
return true, false, nil
}
l := utils.StringList(p.Finalizers)
if !l.Has(constants.FinalizerPodGracefulShutdown) {
return true, false, nil
}
if l.Has(constants.FinalizerDelayPodTermination) {
return false, false, nil
}
return true, false, nil
}

View file

@ -26,6 +26,8 @@ package reconcile
import (
"context"
"github.com/arangodb/kube-arangodb/pkg/deployment/features"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
"github.com/arangodb/kube-arangodb/pkg/util/arangod"
"github.com/arangodb/kube-arangodb/pkg/util/errors"
@ -34,6 +36,10 @@ import (
)
func getShutdownHelper(a *api.Action, ctx ActionContext, log zerolog.Logger) ActionCore {
if features.GracefulShutdown().Enabled() {
return shutdownHelperAPI{action: a, actionCtx: ctx, log: log}
}
serverGroup := ctx.GetSpec().GetServerGroupSpec(a.Group)
switch serverGroup.ShutdownMethod.Get() {
@ -66,9 +72,12 @@ func (s shutdownHelperAPI) Start(ctx context.Context) (bool, error) {
return true, nil
}
// Remove finalizers, so Kubernetes will quickly terminate the pod
if err := s.actionCtx.RemovePodFinalizers(ctx, m.PodName); err != nil {
return false, errors.WithStack(err)
if !features.GracefulShutdown().Enabled() {
if err := s.actionCtx.RemovePodFinalizers(ctx, m.PodName); err != nil {
return false, errors.WithStack(err)
}
}
if group.IsArangod() {
// Invoke shutdown endpoint
ctxChild, cancel := context.WithTimeout(ctx, arangod.GetRequestTimeout())
@ -82,7 +91,7 @@ func (s shutdownHelperAPI) Start(ctx context.Context) (bool, error) {
log.Debug().Bool("removeFromCluster", removeFromCluster).Msg("Shutting down member")
ctxChild, cancel = context.WithTimeout(ctx, shutdownTimeout)
defer cancel()
if err := c.Shutdown(ctxChild, removeFromCluster); err != nil {
if err := c.ShutdownV2(ctxChild, removeFromCluster, true); err != nil {
// Shutdown failed. Let's check if we're already done
if ready, _, err := s.CheckProgress(ctxChild); err == nil && ready {
// We're done

View file

@ -61,6 +61,7 @@ func cleanOutMember(group api.ServerGroup, m api.MemberStatus) api.Plan {
)
}
plan = append(plan,
api.NewAction(api.ActionTypeKillMemberPod, group, m.ID),
api.NewAction(api.ActionTypeShutdownMember, group, m.ID),
api.NewAction(api.ActionTypeRemoveMember, group, m.ID),
)

View file

@ -83,6 +83,7 @@ func createRotateServerStoragePlan(ctx context.Context,
api.NewAction(api.ActionTypeMarkToRemoveMember, group, m.ID))
} else if group == api.ServerGroupAgents {
plan = append(plan,
api.NewAction(api.ActionTypeKillMemberPod, group, m.ID),
api.NewAction(api.ActionTypeShutdownMember, group, m.ID),
api.NewAction(api.ActionTypeRemoveMember, group, m.ID),
api.NewAction(api.ActionTypeAddMember, group, m.ID),
@ -187,6 +188,7 @@ func pvcResizePlan(log zerolog.Logger, group api.ServerGroup, groupSpec api.Serv
case api.PVCResizeModeRotate:
return api.Plan{
api.NewAction(api.ActionTypeResignLeadership, group, memberID),
api.NewAction(api.ActionTypeKillMemberPod, group, memberID),
api.NewAction(api.ActionTypeRotateStartMember, group, memberID),
api.NewAction(api.ActionTypePVCResize, group, memberID),
api.NewAction(api.ActionTypePVCResized, group, memberID),

View file

@ -507,11 +507,13 @@ func TestCreatePlanActiveFailoverScale(t *testing.T) {
}
newPlan, changed = createNormalPlan(ctx, log, depl, nil, spec, status, inspector.NewEmptyInspector(), c)
assert.True(t, changed)
require.Len(t, newPlan, 2) // Note: Downscaling is only down 1 at a time
assert.Equal(t, api.ActionTypeShutdownMember, newPlan[0].Type)
assert.Equal(t, api.ActionTypeRemoveMember, newPlan[1].Type)
require.Len(t, newPlan, 3) // Note: Downscaling is only down 1 at a time
assert.Equal(t, api.ActionTypeKillMemberPod, newPlan[0].Type)
assert.Equal(t, api.ActionTypeShutdownMember, newPlan[1].Type)
assert.Equal(t, api.ActionTypeRemoveMember, newPlan[2].Type)
assert.Equal(t, api.ServerGroupSingle, newPlan[0].Group)
assert.Equal(t, api.ServerGroupSingle, newPlan[1].Group)
assert.Equal(t, api.ServerGroupSingle, newPlan[2].Group)
}
// TestCreatePlanClusterScale creates a `cluster` deployment to test the creating of scaling plan.
@ -609,17 +611,21 @@ func TestCreatePlanClusterScale(t *testing.T) {
spec.Coordinators.Count = util.NewInt(1)
newPlan, changed = createNormalPlan(ctx, log, depl, nil, spec, status, inspector.NewEmptyInspector(), c)
assert.True(t, changed)
require.Len(t, newPlan, 5) // Note: Downscaling is done 1 at a time
require.Len(t, newPlan, 7) // Note: Downscaling is done 1 at a time
assert.Equal(t, api.ActionTypeCleanOutMember, newPlan[0].Type)
assert.Equal(t, api.ActionTypeShutdownMember, newPlan[1].Type)
assert.Equal(t, api.ActionTypeRemoveMember, newPlan[2].Type)
assert.Equal(t, api.ActionTypeShutdownMember, newPlan[3].Type)
assert.Equal(t, api.ActionTypeRemoveMember, newPlan[4].Type)
assert.Equal(t, api.ActionTypeKillMemberPod, newPlan[1].Type)
assert.Equal(t, api.ActionTypeShutdownMember, newPlan[2].Type)
assert.Equal(t, api.ActionTypeRemoveMember, newPlan[3].Type)
assert.Equal(t, api.ActionTypeKillMemberPod, newPlan[4].Type)
assert.Equal(t, api.ActionTypeShutdownMember, newPlan[5].Type)
assert.Equal(t, api.ActionTypeRemoveMember, newPlan[6].Type)
assert.Equal(t, api.ServerGroupDBServers, newPlan[0].Group)
assert.Equal(t, api.ServerGroupDBServers, newPlan[1].Group)
assert.Equal(t, api.ServerGroupDBServers, newPlan[2].Group)
assert.Equal(t, api.ServerGroupCoordinators, newPlan[3].Group)
assert.Equal(t, api.ServerGroupDBServers, newPlan[3].Group)
assert.Equal(t, api.ServerGroupCoordinators, newPlan[4].Group)
assert.Equal(t, api.ServerGroupCoordinators, newPlan[5].Group)
assert.Equal(t, api.ServerGroupCoordinators, newPlan[6].Group)
}
type LastLogRecord struct {
@ -792,6 +798,7 @@ func TestCreatePlan(t *testing.T) {
ad.Status.Members.Agents[0].PersistentVolumeClaimName = pvcName
},
ExpectedPlan: []api.Action{
api.NewAction(api.ActionTypeKillMemberPod, api.ServerGroupAgents, ""),
api.NewAction(api.ActionTypeShutdownMember, api.ServerGroupAgents, ""),
api.NewAction(api.ActionTypeRemoveMember, api.ServerGroupAgents, ""),
api.NewAction(api.ActionTypeAddMember, api.ServerGroupAgents, ""),
@ -988,6 +995,7 @@ func TestCreatePlan(t *testing.T) {
},
ExpectedPlan: []api.Action{
api.NewAction(api.ActionTypeCleanOutMember, api.ServerGroupDBServers, "id"),
api.NewAction(api.ActionTypeKillMemberPod, api.ServerGroupDBServers, ""),
api.NewAction(api.ActionTypeShutdownMember, api.ServerGroupDBServers, ""),
api.NewAction(api.ActionTypeRemoveMember, api.ServerGroupDBServers, ""),
},
@ -1006,6 +1014,7 @@ func TestCreatePlan(t *testing.T) {
},
ExpectedPlan: []api.Action{
api.NewAction(api.ActionTypeCleanOutMember, api.ServerGroupDBServers, "id"),
api.NewAction(api.ActionTypeKillMemberPod, api.ServerGroupDBServers, ""),
api.NewAction(api.ActionTypeShutdownMember, api.ServerGroupDBServers, ""),
api.NewAction(api.ActionTypeRemoveMember, api.ServerGroupDBServers, ""),
},

View file

@ -44,6 +44,7 @@ func createRotateMemberPlan(log zerolog.Logger, member api.MemberStatus,
plan := api.Plan{
api.NewAction(api.ActionTypeCleanTLSKeyfileCertificate, group, member.ID, "Remove server keyfile and enforce renewal/recreation"),
api.NewAction(api.ActionTypeResignLeadership, group, member.ID, reason),
api.NewAction(api.ActionTypeKillMemberPod, group, member.ID, reason),
api.NewAction(api.ActionTypeRotateMember, group, member.ID, reason),
api.NewAction(api.ActionTypeWaitForMemberUp, group, member.ID),
api.NewAction(api.ActionTypeWaitForMemberInSync, group, member.ID),

View file

@ -27,6 +27,8 @@ import (
"math"
"os"
"github.com/arangodb/kube-arangodb/pkg/deployment/features"
"github.com/arangodb/kube-arangodb/pkg/deployment/topology"
"github.com/arangodb/kube-arangodb/pkg/util/collection"
@ -193,7 +195,10 @@ func (a *ArangoDContainer) GetResourceRequirements() core.ResourceRequirements {
}
func (a *ArangoDContainer) GetLifecycle() (*core.Lifecycle, error) {
return k8sutil.NewLifecycle()
if features.GracefulShutdown().Enabled() {
return k8sutil.NewLifecyclePort()
}
return k8sutil.NewLifecycleFinalizers()
}
func (a *ArangoDContainer) GetImagePullPolicy() core.PullPolicy {
@ -348,7 +353,7 @@ func (m *MemberArangoDPod) GetVolumes() ([]core.Volume, []core.VolumeMount) {
if m.spec.Metrics.IsEnabled() {
token := m.spec.Metrics.GetJWTTokenSecretName()
if token != "" {
if m.spec.Authentication.IsAuthenticated() && token != "" {
vol := k8sutil.CreateVolumeWithSecret(k8sutil.ExporterJWTVolumeName, token)
volumes.AddVolume(vol)
}
@ -460,15 +465,20 @@ func (m *MemberArangoDPod) GetInitContainers(cachedStatus interfaces.Inspector)
func (m *MemberArangoDPod) GetFinalizers() []string {
var finalizers []string
if d := m.spec.GetServerGroupSpec(m.group).ShutdownDelay; d != nil {
if d := m.spec.GetServerGroupSpec(m.group).GetShutdownDelay(m.group); d != 0 {
finalizers = append(finalizers, constants.FinalizerDelayPodTermination)
}
switch m.group {
case api.ServerGroupAgents:
finalizers = append(finalizers, constants.FinalizerPodAgencyServing)
case api.ServerGroupDBServers:
finalizers = append(finalizers, constants.FinalizerPodDrainDBServer)
if features.GracefulShutdown().Enabled() {
finalizers = append(finalizers, constants.FinalizerPodGracefulShutdown) // No need for other finalizers, quorum will be managed
} else {
switch m.group {
case api.ServerGroupAgents:
finalizers = append(finalizers, constants.FinalizerPodAgencyServing)
case api.ServerGroupDBServers:
finalizers = append(finalizers, constants.FinalizerPodDrainDBServer)
}
}
return finalizers
@ -502,7 +512,7 @@ func (m *MemberArangoDPod) createMetricsExporterSidecarInternalExporter() (*core
return nil, err
}
if m.spec.Metrics.GetJWTTokenSecretName() != "" {
if m.spec.Authentication.IsAuthenticated() && m.spec.Metrics.GetJWTTokenSecretName() != "" {
c.VolumeMounts = append(c.VolumeMounts, k8sutil.ExporterJWTVolumeMount())
}

View file

@ -113,7 +113,7 @@ func (a *ArangoSyncContainer) GetResourceRequirements() core.ResourceRequirement
}
func (a *ArangoSyncContainer) GetLifecycle() (*core.Lifecycle, error) {
return k8sutil.NewLifecycle()
return k8sutil.NewLifecycleFinalizers()
}
func (a *ArangoSyncContainer) GetImagePullPolicy() core.PullPolicy {

View file

@ -78,6 +78,12 @@ func (r *Resources) runPodFinalizers(ctx context.Context, p *v1.Pod, memberStatu
} else {
log.Debug().Err(err).Str("finalizer", f).Msg("Cannot remove Pod finalizer yet")
}
case constants.FinalizerPodGracefulShutdown:
// We are in graceful shutdown, only one way to remove it is when container is already dead
if isServerContainerDead {
log.Debug().Msg("Server Container is dead, removing finalizer")
removalList = append(removalList, f)
}
case constants.FinalizerDelayPodTermination:
if isServerContainerDead {
log.Debug().Msg("Server Container is dead, removing finalizer")
@ -93,11 +99,12 @@ func (r *Resources) runPodFinalizers(ctx context.Context, p *v1.Pod, memberStatu
log.Error().Str("finalizer", f).Msg("Delay finalizer")
groupSpec := r.context.GetSpec().GetServerGroupSpec(group)
d := time.Duration(util.IntOrDefault(groupSpec.ShutdownDelay, 0)) * time.Second
if t := p.ObjectMeta.DeletionTimestamp; t != nil {
e := p.ObjectMeta.DeletionTimestamp.Time.Sub(time.Now().Add(d))
log.Error().Str("finalizer", f).Dur("left", e).Msg("Delay finalizer status")
if e < 0 {
d := time.Duration(groupSpec.GetShutdownDelay(group)) * time.Second
gr := time.Duration(util.Int64OrDefault(p.ObjectMeta.GetDeletionGracePeriodSeconds(), 0)) * time.Second
e := t.Time.Add(-1 * gr).Sub(time.Now().Add(-1 * d))
log.Error().Str("finalizer", f).Str("left", e.String()).Msg("Delay finalizer status")
if e < 0 || d == 0 {
removalList = append(removalList, f)
}
} else {

View file

@ -25,6 +25,8 @@ package rotation
import (
"github.com/arangodb/kube-arangodb/pkg/apis/deployment"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
"github.com/arangodb/kube-arangodb/pkg/backup/utils"
"github.com/arangodb/kube-arangodb/pkg/util/constants"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector"
"github.com/rs/zerolog"
@ -52,17 +54,7 @@ func (m Mode) And(b Mode) Mode {
// CheckPossible returns true if rotation is possible
func CheckPossible(member api.MemberStatus) bool {
if !member.Phase.IsReady() {
// Skip rotation when we are not ready
return false
}
if member.Conditions.IsTrue(api.ConditionTypeTerminated) || member.Conditions.IsTrue(api.ConditionTypeTerminating) {
// Termination in progress, nothing to do
return false
}
return true
return !member.Conditions.IsTrue(api.ConditionTypeTerminated)
}
func IsRotationRequired(log zerolog.Logger, cachedStatus inspectorInterface.Inspector, spec api.DeploymentSpec, member api.MemberStatus, group api.ServerGroup, pod *core.Pod, specTemplate, statusTemplate *api.ArangoMemberPodTemplate) (mode Mode, plan api.Plan, reason string, err error) {
@ -71,6 +63,16 @@ func IsRotationRequired(log zerolog.Logger, cachedStatus inspectorInterface.Insp
// Set default mode for return value
mode = SkippedRotation
// We are under termination
if member.Conditions.IsTrue(api.ConditionTypeTerminating) || (pod != nil && pod.DeletionTimestamp != nil) {
if l := utils.StringList(pod.Finalizers); l.Has(constants.FinalizerPodGracefulShutdown) && !l.Has(constants.FinalizerDelayPodTermination) {
reason = "Recreation enforced by deleted state"
mode = EnforcedRotation
}
return
}
if !CheckPossible(member) {
// Check is not possible due to improper state of member
return

View file

@ -50,6 +50,7 @@ const (
FinalizerDeplReplStopSync = "replication.database.arangodb.com/stop-sync" // Finalizer added to ArangoDeploymentReplication, indicating the need to stop synchronization
FinalizerPodAgencyServing = "agent.database.arangodb.com/agency-serving" // Finalizer added to Agents, indicating the need for keeping enough agents alive
FinalizerPodDrainDBServer = "dbserver.database.arangodb.com/drain" // Finalizer added to DBServers, indicating the need for draining that dbserver
FinalizerPodGracefulShutdown = "database.arangodb.com/graceful-shutdown" // Finalizer added to All members, indicating the need for graceful shutdown
FinalizerPVCMemberExists = "pvc.database.arangodb.com/member-exists" // Finalizer added to PVCs, indicating the need to keep is as long as its member exists
FinalizerDelayPodTermination = "pod.database.arangodb.com/delay" // Finalizer added to Pod, delays termination

View file

@ -62,8 +62,18 @@ func InitLifecycleContainer(image string, resources *core.ResourceRequirements,
return c, nil
}
// NewLifecycleFinalizers creates a lifecycle structure with preStop handler which wait for finalizers to be removed.
func NewLifecycleFinalizers() (*core.Lifecycle, error) {
return NewLifecycle("finalizers")
}
// NewLifecyclePort creates a lifecycle structure with preStop handler which wait for port to be closed.
func NewLifecyclePort() (*core.Lifecycle, error) {
return NewLifecycle("port")
}
// NewLifecycle creates a lifecycle structure with preStop handler.
func NewLifecycle() (*core.Lifecycle, error) {
func NewLifecycle(t string) (*core.Lifecycle, error) {
binaryPath, err := os.Executable()
if err != nil {
return nil, errors.WithStack(err)
@ -72,7 +82,7 @@ func NewLifecycle() (*core.Lifecycle, error) {
lifecycle := &core.Lifecycle{
PreStop: &core.Handler{
Exec: &core.ExecAction{
Command: append([]string{exePath}, "lifecycle", "preStop"),
Command: append([]string{exePath}, "lifecycle", "preStop", t),
},
},
}

View file

@ -154,11 +154,11 @@ func IsContainerRunning(pod *core.Pod, name string) bool {
continue
}
if c.State.Terminated != nil {
if c.State.Running == nil {
return false
} else {
return true
}
return true
}
return false
}