1
0
Fork 0
mirror of https://github.com/arangodb/kube-arangodb.git synced 2024-12-14 11:57:37 +00:00

[Feature] [TG-165] Kill pod forcefully (#896)

This commit is contained in:
Tomasz Mielech 2022-01-31 12:37:10 +01:00 committed by GitHub
parent b296b837d4
commit c8a0920c16
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 227 additions and 158 deletions

View file

@ -4,6 +4,7 @@
- Do not check License V2 on Community images
- Add status.members.<group>.
- Define MemberReplacementRequired condition
- Remove pod immediately when annotation is turned on
## [1.2.7](https://github.com/arangodb/kube-arangodb/tree/1.2.7) (2022-01-17)
- Add Plan BackOff functionality

View file

@ -25,5 +25,6 @@ const (
ArangoDeploymentPodMaintenanceAnnotation = ArangoDeploymentAnnotationPrefix + "/maintenance"
ArangoDeploymentPodRotateAnnotation = ArangoDeploymentAnnotationPrefix + "/rotate"
ArangoDeploymentPodReplaceAnnotation = ArangoDeploymentAnnotationPrefix + "/replace"
ArangoDeploymentPodDeleteNow = ArangoDeploymentAnnotationPrefix + "/delete_now"
ArangoDeploymentPlanCleanAnnotation = "plan." + ArangoDeploymentAnnotationPrefix + "/clean"
)

View file

@ -59,7 +59,7 @@ const (
// ConditionTypeTerminating indicates that the member is terminating but not yet terminated.
ConditionTypeTerminating ConditionType = "Terminating"
// ConditionTypeTerminating indicates that the deployment is up to date.
// ConditionTypeUpToDate indicates that the deployment is up to date.
ConditionTypeUpToDate ConditionType = "UpToDate"
// ConditionTypeMarkedToRemove indicates that the member is marked to be removed.
ConditionTypeMarkedToRemove ConditionType = "MarkedToRemove"

View file

@ -77,7 +77,7 @@ const (
ActionTypeRotateMember ActionType = "RotateMember"
// ActionTypeRotateStartMember causes a member to be shutdown and have it's pod removed. Do not wait to pod recover.
ActionTypeRotateStartMember ActionType = "RotateStartMember"
// ActionTypeRotateMember causes a member to be restored.
// ActionTypeRotateStopMember causes a member to be restored.
ActionTypeRotateStopMember ActionType = "RotateStopMember"
// ActionTypeUpgradeMember causes a member to be shutdown and have it's pod removed, restarted with AutoUpgrade option, waited until termination and the restarted again.
ActionTypeUpgradeMember ActionType = "UpgradeMember"
@ -105,7 +105,7 @@ const (
ActionTypeUpdateTLSSNI ActionType = "UpdateTLSSNI"
// ActionTypeSetCurrentImage causes status.CurrentImage to be updated to the image given in the action.
ActionTypeSetCurrentImage ActionType = "SetCurrentImage"
// ActionTypeSetCurrentImage replace image of member to current one.
// ActionTypeSetMemberCurrentImage replace image of member to current one.
ActionTypeSetMemberCurrentImage ActionType = "SetMemberCurrentImage"
// ActionTypeDisableClusterScaling turns off scaling DBservers and coordinators
ActionTypeDisableClusterScaling ActionType = "ScalingDisabled"
@ -115,7 +115,7 @@ const (
ActionTypePVCResize ActionType = "PVCResize"
// ActionTypePVCResized waits for PVC to resize for defined time
ActionTypePVCResized ActionType = "PVCResized"
// UpToDateUpdateResized define up to date annotation in spec
// UpToDateUpdate define up to date annotation in spec
UpToDateUpdate ActionType = "UpToDateUpdate"
// ActionTypeBackupRestore restore plan
ActionTypeBackupRestore ActionType = "BackupRestore"

View file

@ -59,7 +59,7 @@ const (
// ConditionTypeTerminating indicates that the member is terminating but not yet terminated.
ConditionTypeTerminating ConditionType = "Terminating"
// ConditionTypeTerminating indicates that the deployment is up to date.
// ConditionTypeUpToDate indicates that the deployment is up to date.
ConditionTypeUpToDate ConditionType = "UpToDate"
// ConditionTypeMarkedToRemove indicates that the member is marked to be removed.
ConditionTypeMarkedToRemove ConditionType = "MarkedToRemove"

View file

@ -77,7 +77,7 @@ const (
ActionTypeRotateMember ActionType = "RotateMember"
// ActionTypeRotateStartMember causes a member to be shutdown and have it's pod removed. Do not wait to pod recover.
ActionTypeRotateStartMember ActionType = "RotateStartMember"
// ActionTypeRotateMember causes a member to be restored.
// ActionTypeRotateStopMember causes a member to be restored.
ActionTypeRotateStopMember ActionType = "RotateStopMember"
// ActionTypeUpgradeMember causes a member to be shutdown and have it's pod removed, restarted with AutoUpgrade option, waited until termination and the restarted again.
ActionTypeUpgradeMember ActionType = "UpgradeMember"
@ -105,7 +105,7 @@ const (
ActionTypeUpdateTLSSNI ActionType = "UpdateTLSSNI"
// ActionTypeSetCurrentImage causes status.CurrentImage to be updated to the image given in the action.
ActionTypeSetCurrentImage ActionType = "SetCurrentImage"
// ActionTypeSetCurrentImage replace image of member to current one.
// ActionTypeSetMemberCurrentImage replace image of member to current one.
ActionTypeSetMemberCurrentImage ActionType = "SetMemberCurrentImage"
// ActionTypeDisableClusterScaling turns off scaling DBservers and coordinators
ActionTypeDisableClusterScaling ActionType = "ScalingDisabled"
@ -115,7 +115,7 @@ const (
ActionTypePVCResize ActionType = "PVCResize"
// ActionTypePVCResized waits for PVC to resize for defined time
ActionTypePVCResized ActionType = "PVCResized"
// UpToDateUpdateResized define up to date annotation in spec
// UpToDateUpdate define up to date annotation in spec
UpToDateUpdate ActionType = "UpToDateUpdate"
// ActionTypeBackupRestore restore plan
ActionTypeBackupRestore ActionType = "BackupRestore"

View file

@ -23,9 +23,9 @@ package chaos
import (
"context"
v1 "k8s.io/api/core/v1"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
v1 "k8s.io/api/core/v1"
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
)
// Context provides methods to the chaos package.
@ -34,7 +34,7 @@ type Context interface {
GetSpec() api.DeploymentSpec
// DeletePod deletes a pod with given name in the namespace
// of the deployment. If the pod does not exist, the error is ignored.
DeletePod(ctx context.Context, podName string) error
DeletePod(ctx context.Context, podName string, options meta.DeleteOptions) error
// GetOwnedPods returns a list of all pods owned by the deployment.
GetOwnedPods(ctx context.Context) ([]v1.Pod, error)
}

View file

@ -25,10 +25,11 @@ import (
"math/rand"
"time"
"github.com/arangodb/kube-arangodb/pkg/util/errors"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
"github.com/arangodb/kube-arangodb/pkg/util/errors"
)
// Monkey is the service that introduces chaos in the deployment
@ -87,7 +88,7 @@ func (m Monkey) killRandomPod(ctx context.Context) error {
}
p := pods[rand.Intn(len(pods))]
m.log.Info().Str("pod-name", p.GetName()).Msg("Killing pod")
if err := m.context.DeletePod(ctx, p.GetName()); err != nil {
if err := m.context.DeletePod(ctx, p.GetName(), meta.DeleteOptions{}); err != nil {
return errors.WithStack(err)
}
return nil

View file

@ -389,10 +389,10 @@ func (d *Deployment) GetPod(ctx context.Context, podName string) (*core.Pod, err
// DeletePod deletes a pod with given name in the namespace
// of the deployment. If the pod does not exist, the error is ignored.
func (d *Deployment) DeletePod(ctx context.Context, podName string) error {
func (d *Deployment) DeletePod(ctx context.Context, podName string, options meta.DeleteOptions) error {
log := d.deps.Log
err := globals.GetGlobalTimeouts().Kubernetes().RunWithTimeout(ctx, func(ctxChild context.Context) error {
return d.PodsModInterface().Delete(ctxChild, podName, meta.DeleteOptions{})
return d.PodsModInterface().Delete(ctxChild, podName, options)
})
if err != nil && !k8sutil.IsNotFound(err) {
log.Debug().Err(err).Str("pod", podName).Msg("Failed to remove pod")

View file

@ -125,3 +125,20 @@ func getActionFactory(t api.ActionType) (actionFactory, bool) {
f, ok := actions[t]
return f, ok
}
type actionSuccess struct{}
// NewActionSuccess returns action which always returns success.
func NewActionSuccess() ActionCore {
return actionSuccess{}
}
// Start always returns true.
func (actionSuccess) Start(_ context.Context) (bool, error) {
return true, nil
}
// CheckProgress always returns true.
func (actionSuccess) CheckProgress(_ context.Context) (bool, bool, error) {
return true, false, nil
}

View file

@ -23,8 +23,22 @@ package reconcile
import (
"context"
agencyCache "github.com/arangodb/kube-arangodb/pkg/deployment/agency"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
core "k8s.io/api/core/v1"
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
"github.com/arangodb/arangosync-client/client"
"github.com/arangodb/go-driver"
"github.com/arangodb/go-driver/agency"
backupApi "github.com/arangodb/kube-arangodb/pkg/apis/backup/v1"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
agencyCache "github.com/arangodb/kube-arangodb/pkg/deployment/agency"
"github.com/arangodb/kube-arangodb/pkg/deployment/resources"
"github.com/arangodb/kube-arangodb/pkg/util/errors"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/arangomember"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/persistentvolumeclaim"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/pod"
@ -33,25 +47,6 @@ import (
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/service"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/serviceaccount"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/servicemonitor"
"github.com/arangodb/kube-arangodb/pkg/deployment/resources"
inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector"
"github.com/arangodb/kube-arangodb/pkg/util/errors"
backupApi "github.com/arangodb/kube-arangodb/pkg/apis/backup/v1"
"github.com/arangodb/go-driver/agency"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
core "k8s.io/api/core/v1"
"github.com/arangodb/arangosync-client/client"
driver "github.com/arangodb/go-driver"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
)
// ActionContext provides methods to the Action implementations
@ -103,7 +98,7 @@ type ActionContext interface {
GetPod(ctx context.Context, podName string) (*core.Pod, error)
// DeletePod deletes a pod with given name in the namespace
// of the deployment. If the pod does not exist, the error is ignored.
DeletePod(ctx context.Context, podName string) error
DeletePod(ctx context.Context, podName string, options meta.DeleteOptions) error
// DeletePvc deletes a persistent volume claim with given name in the namespace
// of the deployment. If the pvc does not exist, the error is ignored.
DeletePvc(ctx context.Context, pvcName string) error
@ -427,8 +422,8 @@ func (ac *actionContext) GetPod(ctx context.Context, podName string) (*core.Pod,
// DeletePod deletes a pod with given name in the namespace
// of the deployment. If the pod does not exist, the error is ignored.
func (ac *actionContext) DeletePod(ctx context.Context, podName string) error {
if err := ac.context.DeletePod(ctx, podName); err != nil {
func (ac *actionContext) DeletePod(ctx context.Context, podName string, options meta.DeleteOptions) error {
if err := ac.context.DeletePod(ctx, podName, options); err != nil {
return errors.WithStack(err)
}
return nil

View file

@ -23,12 +23,13 @@ package reconcile
import (
"context"
"github.com/rs/zerolog"
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
"github.com/arangodb/kube-arangodb/pkg/deployment/features"
"github.com/arangodb/kube-arangodb/pkg/handlers/utils"
"github.com/arangodb/kube-arangodb/pkg/util/constants"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
"github.com/rs/zerolog"
)
func init() {
@ -66,7 +67,7 @@ func (a *actionKillMemberPod) Start(ctx context.Context) (bool, error) {
return true, nil
}
if err := a.actionCtx.DeletePod(ctx, m.PodName); err != nil {
if err := a.actionCtx.DeletePod(ctx, m.PodName, meta.DeleteOptions{}); err != nil {
log.Error().Err(err).Msg("Unable to kill pod")
return true, nil
}

View file

@ -23,17 +23,15 @@ package reconcile
import (
"context"
"github.com/arangodb/kube-arangodb/pkg/util/globals"
apiErrors "k8s.io/apimachinery/pkg/api/errors"
"github.com/arangodb/kube-arangodb/pkg/util/errors"
"github.com/arangodb/go-driver"
"github.com/rs/zerolog"
apiErrors "k8s.io/apimachinery/pkg/api/errors"
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
driver "github.com/arangodb/go-driver"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
"github.com/arangodb/kube-arangodb/pkg/util/arangod"
"github.com/arangodb/kube-arangodb/pkg/util/errors"
"github.com/arangodb/kube-arangodb/pkg/util/globals"
)
func init() {
@ -108,7 +106,7 @@ func (a *actionRemoveMember) Start(ctx context.Context) (bool, error) {
}
if m.PodName != "" {
// Remove the pod (if any)
if err := a.actionCtx.DeletePod(ctx, m.PodName); err != nil {
if err := a.actionCtx.DeletePod(ctx, m.PodName, meta.DeleteOptions{}); err != nil {
if !apiErrors.IsNotFound(err) {
return false, errors.WithStack(err)
}

View file

@ -23,12 +23,12 @@ package reconcile
import (
"context"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
"github.com/arangodb/kube-arangodb/pkg/util/errors"
"github.com/rs/zerolog"
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
"github.com/rs/zerolog"
"github.com/arangodb/kube-arangodb/pkg/util/errors"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
)
func init() {
@ -55,13 +55,12 @@ type actionRotateMember struct {
// Returns true if the action is completely finished, false in case
// the start time needs to be recorded and a ready condition needs to be checked.
func (a *actionRotateMember) Start(ctx context.Context) (bool, error) {
log := a.log
m, ok := a.actionCtx.GetMemberStatusByID(a.action.MemberID)
shutdown, m, ok := getShutdownHelper(&a.action, a.actionCtx, a.log)
if !ok {
log.Error().Msg("No such member")
return true, nil
}
if ready, err := getShutdownHelper(&a.action, a.actionCtx, a.log).Start(ctx); err != nil {
if ready, err := shutdown.Start(ctx); err != nil {
return false, err
} else if ready {
return true, nil
@ -81,20 +80,19 @@ func (a *actionRotateMember) Start(ctx context.Context) (bool, error) {
func (a *actionRotateMember) CheckProgress(ctx context.Context) (bool, bool, error) {
// Check that pod is removed
log := a.log
m, found := a.actionCtx.GetMemberStatusByID(a.action.MemberID)
if !found {
log.Error().Msg("No such member")
shutdown, m, ok := getShutdownHelper(&a.action, a.actionCtx, a.log)
if !ok {
return true, false, nil
}
if ready, abort, err := getShutdownHelper(&a.action, a.actionCtx, a.log).CheckProgress(ctx); err != nil {
if ready, abort, err := shutdown.CheckProgress(ctx); err != nil {
return false, abort, err
} else if !ready {
return false, false, nil
}
// Pod is terminated, we can now remove it
if err := a.actionCtx.DeletePod(ctx, m.PodName); err != nil {
if err := a.actionCtx.DeletePod(ctx, m.PodName, meta.DeleteOptions{}); err != nil {
if !k8sutil.IsNotFound(err) {
log.Error().Err(err).Msg("Unable to delete pod")
return false, false, nil

View file

@ -23,12 +23,12 @@ package reconcile
import (
"context"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
"github.com/arangodb/kube-arangodb/pkg/util/errors"
"github.com/rs/zerolog"
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
"github.com/rs/zerolog"
"github.com/arangodb/kube-arangodb/pkg/util/errors"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
)
func init() {
@ -55,13 +55,12 @@ type actionRotateStartMember struct {
// Returns true if the action is completely finished, false in case
// the start time needs to be recorded and a ready condition needs to be checked.
func (a *actionRotateStartMember) Start(ctx context.Context) (bool, error) {
log := a.log
m, ok := a.actionCtx.GetMemberStatusByID(a.action.MemberID)
shutdown, m, ok := getShutdownHelper(&a.action, a.actionCtx, a.log)
if !ok {
log.Error().Msg("No such member")
return true, nil
}
if ready, err := getShutdownHelper(&a.action, a.actionCtx, a.log).Start(ctx); err != nil {
if ready, err := shutdown.Start(ctx); err != nil {
return false, err
} else if ready {
return true, nil
@ -81,20 +80,19 @@ func (a *actionRotateStartMember) Start(ctx context.Context) (bool, error) {
func (a *actionRotateStartMember) CheckProgress(ctx context.Context) (bool, bool, error) {
// Check that pod is removed
log := a.log
m, found := a.actionCtx.GetMemberStatusByID(a.action.MemberID)
if !found {
log.Error().Msg("No such member")
shutdown, m, ok := getShutdownHelper(&a.action, a.actionCtx, a.log)
if !ok {
return true, false, nil
}
if ready, abort, err := getShutdownHelper(&a.action, a.actionCtx, a.log).CheckProgress(ctx); err != nil {
if ready, abort, err := shutdown.CheckProgress(ctx); err != nil {
return false, abort, err
} else if !ready {
return false, false, nil
}
// Pod is terminated, we can now remove it
if err := a.actionCtx.DeletePod(ctx, m.PodName); err != nil {
if err := a.actionCtx.DeletePod(ctx, m.PodName, meta.DeleteOptions{}); err != nil {
if !k8sutil.IsNotFound(err) {
log.Error().Err(err).Msg("Unable to delete pod")
return false, false, nil

View file

@ -53,14 +53,12 @@ type actionShutdownMember struct {
// Returns true if the action is completely finished, false in case
// the start time needs to be recorded and a ready condition needs to be checked.
func (a *actionShutdownMember) Start(ctx context.Context) (bool, error) {
log := a.log
m, ok := a.actionCtx.GetMemberStatusByID(a.action.MemberID)
shutdown, m, ok := getShutdownHelper(&a.action, a.actionCtx, a.log)
if !ok {
log.Error().Msg("No such member")
return true, nil
}
if ready, err := getShutdownHelper(&a.action, a.actionCtx, a.log).Start(ctx); err != nil {
if ready, err := shutdown.Start(ctx); err != nil {
return false, err
} else if ready {
return true, nil
@ -77,7 +75,12 @@ func (a *actionShutdownMember) Start(ctx context.Context) (bool, error) {
// CheckProgress checks the progress of the action.
// Returns: ready, abort, error.
func (a *actionShutdownMember) CheckProgress(ctx context.Context) (bool, bool, error) {
if ready, abort, err := getShutdownHelper(&a.action, a.actionCtx, a.log).CheckProgress(ctx); err != nil {
shutdown, _, ok := getShutdownHelper(&a.action, a.actionCtx, a.log)
if !ok {
return true, false, nil
}
if ready, abort, err := shutdown.CheckProgress(ctx); err != nil {
return false, abort, err
} else {
return ready, false, nil

View file

@ -23,15 +23,15 @@ package reconcile
import (
"context"
"github.com/arangodb/kube-arangodb/pkg/deployment/resources"
"github.com/arangodb/arangosync-client/client"
driver "github.com/arangodb/go-driver"
"github.com/arangodb/go-driver"
"github.com/arangodb/go-driver/agency"
v1 "k8s.io/api/core/v1"
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
backupApi "github.com/arangodb/kube-arangodb/pkg/apis/backup/v1"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
"github.com/arangodb/kube-arangodb/pkg/deployment/resources"
"github.com/arangodb/kube-arangodb/pkg/util/arangod/conn"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
)
@ -79,7 +79,7 @@ type Context interface {
GetPod(ctx context.Context, podName string) (*v1.Pod, error)
// DeletePod deletes a pod with given name in the namespace
// of the deployment. If the pod does not exist, the error is ignored.
DeletePod(ctx context.Context, podName string) error
DeletePod(ctx context.Context, podName string, options meta.DeleteOptions) error
// DeletePvc deletes a persistent volume claim with given name in the namespace
// of the deployment. If the pvc does not exist, the error is ignored.
DeletePvc(ctx context.Context, pvcName string) error

View file

@ -23,35 +23,62 @@ package reconcile
import (
"context"
"github.com/arangodb/kube-arangodb/pkg/util/globals"
"github.com/arangodb/kube-arangodb/pkg/apis/deployment"
"github.com/arangodb/kube-arangodb/pkg/deployment/features"
"github.com/rs/zerolog"
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
"github.com/arangodb/kube-arangodb/pkg/deployment/features"
"github.com/arangodb/kube-arangodb/pkg/util"
"github.com/arangodb/kube-arangodb/pkg/util/errors"
"github.com/arangodb/kube-arangodb/pkg/util/globals"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
"github.com/rs/zerolog"
)
func getShutdownHelper(a *api.Action, ctx ActionContext, log zerolog.Logger) ActionCore {
if features.GracefulShutdown().Enabled() {
return shutdownHelperAPI{action: a, actionCtx: ctx, log: log}
// getShutdownHelper returns an action to shut down a pod according to the settings.
// Returns true when member status exists.
// There are 3 possibilities to shut down the pod: immediately, gracefully, standard kubernetes delete API.
// When pod does not exist then success action (which always successes) is returned.
func getShutdownHelper(a *api.Action, actionCtx ActionContext, log zerolog.Logger) (ActionCore, api.MemberStatus, bool) {
m, ok := actionCtx.GetMemberStatusByID(a.MemberID)
if !ok {
log.Warn().Str("pod-name", m.PodName).Msg("member is already gone")
return nil, api.MemberStatus{}, false
}
serverGroup := ctx.GetSpec().GetServerGroupSpec(a.Group)
pod, ok := actionCtx.GetCachedStatus().Pod(m.PodName)
if !ok {
log.Warn().Str("pod-name", m.PodName).Msg("pod is already gone")
// Pod does not exist, so create success action to finish it immediately.
return NewActionSuccess(), m, true
}
if _, ok := pod.GetAnnotations()[deployment.ArangoDeploymentPodDeleteNow]; ok {
// The pod contains annotation, so pod must be deleted immediately.
return shutdownNow{action: a, actionCtx: actionCtx, log: log, memberStatus: m}, m, true
}
if features.GracefulShutdown().Enabled() {
return shutdownHelperAPI{action: a, actionCtx: actionCtx, log: log, memberStatus: m}, m, true
}
serverGroup := actionCtx.GetSpec().GetServerGroupSpec(a.Group)
switch serverGroup.ShutdownMethod.Get() {
case api.ServerGroupShutdownMethodDelete:
return shutdownHelperDelete{action: a, actionCtx: ctx, log: log}
return shutdownHelperDelete{action: a, actionCtx: actionCtx, log: log, memberStatus: m}, m, true
default:
return shutdownHelperAPI{action: a, actionCtx: ctx, log: log}
return shutdownHelperAPI{action: a, actionCtx: actionCtx, log: log, memberStatus: m}, m, true
}
}
type shutdownHelperAPI struct {
log zerolog.Logger
action *api.Action
actionCtx ActionContext
log zerolog.Logger
action *api.Action
actionCtx ActionContext
memberStatus api.MemberStatus
}
func (s shutdownHelperAPI) Start(ctx context.Context) (bool, error) {
@ -60,18 +87,14 @@ func (s shutdownHelperAPI) Start(ctx context.Context) (bool, error) {
log.Info().Msgf("Using API to shutdown member")
group := s.action.Group
m, ok := s.actionCtx.GetMemberStatusByID(s.action.MemberID)
if !ok {
log.Error().Msg("No such member")
return true, nil
}
if m.PodName == "" {
podName := s.memberStatus.PodName
if podName == "" {
log.Warn().Msgf("Pod is empty")
return true, nil
}
// Remove finalizers, so Kubernetes will quickly terminate the pod
if !features.GracefulShutdown().Enabled() {
if err := s.actionCtx.RemovePodFinalizers(ctx, m.PodName); err != nil {
if err := s.actionCtx.RemovePodFinalizers(ctx, podName); err != nil {
return false, errors.WithStack(err)
}
}
@ -100,7 +123,7 @@ func (s shutdownHelperAPI) Start(ctx context.Context) (bool, error) {
}
} else if group.IsArangosync() {
// Terminate pod
if err := s.actionCtx.DeletePod(ctx, m.PodName); err != nil {
if err := s.actionCtx.DeletePod(ctx, podName, meta.DeleteOptions{}); err != nil {
return false, errors.WithStack(err)
}
}
@ -108,26 +131,17 @@ func (s shutdownHelperAPI) Start(ctx context.Context) (bool, error) {
return false, nil
}
func (s shutdownHelperAPI) CheckProgress(ctx context.Context) (bool, bool, error) {
// Check that pod is removed
log := s.log
m, found := s.actionCtx.GetMemberStatusByID(s.action.MemberID)
if !found {
log.Error().Msg("No such member")
return true, false, nil
}
if !m.Conditions.IsTrue(api.ConditionTypeTerminated) {
// Pod is not yet terminated
return false, false, nil
}
return true, false, nil
// CheckProgress returns true when pod is terminated.
func (s shutdownHelperAPI) CheckProgress(_ context.Context) (bool, bool, error) {
terminated := s.memberStatus.Conditions.IsTrue(api.ConditionTypeTerminated)
return terminated, false, nil
}
type shutdownHelperDelete struct {
log zerolog.Logger
action *api.Action
actionCtx ActionContext
log zerolog.Logger
action *api.Action
actionCtx ActionContext
memberStatus api.MemberStatus
}
func (s shutdownHelperDelete) Start(ctx context.Context) (bool, error) {
@ -135,23 +149,17 @@ func (s shutdownHelperDelete) Start(ctx context.Context) (bool, error) {
log.Info().Msgf("Using Pod Delete to shutdown member")
m, ok := s.actionCtx.GetMemberStatusByID(s.action.MemberID)
if !ok {
log.Error().Msg("No such member")
return true, nil
}
if m.PodName == "" {
podName := s.memberStatus.PodName
if podName == "" {
log.Warn().Msgf("Pod is empty")
return true, nil
}
// Terminate pod
if err := s.actionCtx.DeletePod(ctx, m.PodName); err != nil {
if err := s.actionCtx.DeletePod(ctx, podName, meta.DeleteOptions{}); err != nil {
if !k8sutil.IsNotFound(err) {
return false, errors.WithStack(err)
}
}
return false, nil
@ -160,20 +168,15 @@ func (s shutdownHelperDelete) Start(ctx context.Context) (bool, error) {
func (s shutdownHelperDelete) CheckProgress(ctx context.Context) (bool, bool, error) {
// Check that pod is removed
log := s.log
m, found := s.actionCtx.GetMemberStatusByID(s.action.MemberID)
if !found {
log.Error().Msg("No such member")
return true, false, nil
}
if !m.Conditions.IsTrue(api.ConditionTypeTerminated) {
if !s.memberStatus.Conditions.IsTrue(api.ConditionTypeTerminated) {
// Pod is not yet terminated
log.Warn().Msgf("Pod not yet terminated")
return false, false, nil
}
if m.PodName != "" {
if _, err := s.actionCtx.GetPod(ctx, m.PodName); err == nil {
podName := s.memberStatus.PodName
if podName != "" {
if _, err := s.actionCtx.GetPod(ctx, podName); err == nil {
log.Warn().Msgf("Pod still exists")
return false, false, nil
} else if !k8sutil.IsNotFound(err) {
@ -184,3 +187,58 @@ func (s shutdownHelperDelete) CheckProgress(ctx context.Context) (bool, bool, er
return true, false, nil
}
type shutdownNow struct {
action *api.Action
actionCtx ActionContext
memberStatus api.MemberStatus
log zerolog.Logger
}
// Start starts removing pod forcefully.
func (s shutdownNow) Start(ctx context.Context) (bool, error) {
// Check progress is used here because removing pod can start gracefully,
// and then it can be changed to force shutdown.
s.log.Info().Msg("Using shutdown now method")
ready, _, err := s.CheckProgress(ctx)
return ready, err
}
// CheckProgress starts removing pod forcefully and checks if has it been removed.
func (s shutdownNow) CheckProgress(ctx context.Context) (bool, bool, error) {
podName := s.memberStatus.PodName
pod, err := s.actionCtx.GetPod(ctx, podName)
if err != nil {
if k8sutil.IsNotFound(err) {
s.log.Info().Msg("Using shutdown now method completed because pod is gone")
return true, false, nil
}
return false, false, errors.Wrapf(err, "failed to get pod")
}
if s.memberStatus.PodUID != pod.GetUID() {
s.log.Info().Msg("Using shutdown now method completed because it is already rotated")
// The new pod has been started already.
return true, false, nil
}
// Remove finalizers forcefully.
if err := s.actionCtx.RemovePodFinalizers(ctx, podName); err != nil {
return false, false, errors.WithStack(err)
}
// Terminate pod.
options := meta.DeleteOptions{
// Leave one second to clean a PVC.
GracePeriodSeconds: util.NewInt64(1),
}
if err := s.actionCtx.DeletePod(ctx, podName, options); err != nil {
if !k8sutil.IsNotFound(err) {
return false, false, errors.WithStack(err)
}
}
s.log.Info().Msgf("Using shutdown now method completed")
return true, false, nil
}

View file

@ -305,7 +305,7 @@ func (c *testContext) CreateMember(_ context.Context, group api.ServerGroup, id
panic("implement me")
}
func (c *testContext) DeletePod(_ context.Context, podName string) error {
func (c *testContext) DeletePod(_ context.Context, _ string, _ meta.DeleteOptions) error {
panic("implement me")
}

View file

@ -24,6 +24,7 @@ import (
"context"
"github.com/rs/zerolog"
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
)
@ -53,7 +54,7 @@ func (r *Reconciler) CheckDeployment(ctx context.Context) error {
if status.Members.Coordinators.AllFailed() {
r.log.Error().Msg("All coordinators failed - reset")
for _, m := range status.Members.Coordinators {
if err := r.context.DeletePod(ctx, m.PodName); err != nil {
if err := r.context.DeletePod(ctx, m.PodName, meta.DeleteOptions{}); err != nil {
r.log.Error().Err(err).Msg("Failed to delete pod")
}
m.Phase = api.MemberPhaseNone

View file

@ -23,10 +23,18 @@ package resources
import (
"context"
"github.com/arangodb/kube-arangodb/pkg/deployment/patch"
"github.com/arangodb/go-driver"
"github.com/arangodb/go-driver/agency"
core "k8s.io/api/core/v1"
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
backupApi "github.com/arangodb/kube-arangodb/pkg/apis/backup/v1"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
agencyCache "github.com/arangodb/kube-arangodb/pkg/deployment/agency"
"github.com/arangodb/kube-arangodb/pkg/deployment/patch"
"github.com/arangodb/kube-arangodb/pkg/operator/scope"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/arangomember"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/persistentvolumeclaim"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/pod"
@ -35,17 +43,6 @@ import (
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/service"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/serviceaccount"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/servicemonitor"
"github.com/arangodb/kube-arangodb/pkg/operator/scope"
backupApi "github.com/arangodb/kube-arangodb/pkg/apis/backup/v1"
driver "github.com/arangodb/go-driver"
"github.com/arangodb/go-driver/agency"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector"
core "k8s.io/api/core/v1"
)
// ServerGroupIterator provides a helper to callback on every server
@ -196,7 +193,7 @@ type Context interface {
CleanupPod(ctx context.Context, p *core.Pod) error
// DeletePod deletes a pod with given name in the namespace
// of the deployment. If the pod does not exist, the error is ignored.
DeletePod(ctx context.Context, podName string) error
DeletePod(ctx context.Context, podName string, options meta.DeleteOptions) error
// DeletePvc deletes a persistent volume claim with given name in the namespace
// of the deployment. If the pvc does not exist, the error is ignored.
DeletePvc(ctx context.Context, pvcName string) error

View file

@ -151,7 +151,7 @@ func (r *Resources) cleanupRemovedClusterMembers(ctx context.Context) error {
for _, podName := range podNamesToRemove {
log.Info().Str("pod", podName).Msg("Removing obsolete member pod")
if err := r.context.DeletePod(ctx, podName); err != nil && !k8sutil.IsNotFound(err) {
if err := r.context.DeletePod(ctx, podName, metav1.DeleteOptions{}); err != nil && !k8sutil.IsNotFound(err) {
log.Warn().Err(err).Str("pod", podName).Msg("Failed to remove obsolete pod")
}
}