mirror of
https://github.com/arangodb/kube-arangodb.git
synced 2024-12-14 11:57:37 +00:00
[Feature] [TG-165] Kill pod forcefully (#896)
This commit is contained in:
parent
b296b837d4
commit
c8a0920c16
22 changed files with 227 additions and 158 deletions
|
@ -4,6 +4,7 @@
|
|||
- Do not check License V2 on Community images
|
||||
- Add status.members.<group>.
|
||||
- Define MemberReplacementRequired condition
|
||||
- Remove pod immediately when annotation is turned on
|
||||
|
||||
## [1.2.7](https://github.com/arangodb/kube-arangodb/tree/1.2.7) (2022-01-17)
|
||||
- Add Plan BackOff functionality
|
||||
|
|
|
@ -25,5 +25,6 @@ const (
|
|||
ArangoDeploymentPodMaintenanceAnnotation = ArangoDeploymentAnnotationPrefix + "/maintenance"
|
||||
ArangoDeploymentPodRotateAnnotation = ArangoDeploymentAnnotationPrefix + "/rotate"
|
||||
ArangoDeploymentPodReplaceAnnotation = ArangoDeploymentAnnotationPrefix + "/replace"
|
||||
ArangoDeploymentPodDeleteNow = ArangoDeploymentAnnotationPrefix + "/delete_now"
|
||||
ArangoDeploymentPlanCleanAnnotation = "plan." + ArangoDeploymentAnnotationPrefix + "/clean"
|
||||
)
|
||||
|
|
|
@ -59,7 +59,7 @@ const (
|
|||
|
||||
// ConditionTypeTerminating indicates that the member is terminating but not yet terminated.
|
||||
ConditionTypeTerminating ConditionType = "Terminating"
|
||||
// ConditionTypeTerminating indicates that the deployment is up to date.
|
||||
// ConditionTypeUpToDate indicates that the deployment is up to date.
|
||||
ConditionTypeUpToDate ConditionType = "UpToDate"
|
||||
// ConditionTypeMarkedToRemove indicates that the member is marked to be removed.
|
||||
ConditionTypeMarkedToRemove ConditionType = "MarkedToRemove"
|
||||
|
|
|
@ -77,7 +77,7 @@ const (
|
|||
ActionTypeRotateMember ActionType = "RotateMember"
|
||||
// ActionTypeRotateStartMember causes a member to be shutdown and have it's pod removed. Do not wait to pod recover.
|
||||
ActionTypeRotateStartMember ActionType = "RotateStartMember"
|
||||
// ActionTypeRotateMember causes a member to be restored.
|
||||
// ActionTypeRotateStopMember causes a member to be restored.
|
||||
ActionTypeRotateStopMember ActionType = "RotateStopMember"
|
||||
// ActionTypeUpgradeMember causes a member to be shutdown and have it's pod removed, restarted with AutoUpgrade option, waited until termination and the restarted again.
|
||||
ActionTypeUpgradeMember ActionType = "UpgradeMember"
|
||||
|
@ -105,7 +105,7 @@ const (
|
|||
ActionTypeUpdateTLSSNI ActionType = "UpdateTLSSNI"
|
||||
// ActionTypeSetCurrentImage causes status.CurrentImage to be updated to the image given in the action.
|
||||
ActionTypeSetCurrentImage ActionType = "SetCurrentImage"
|
||||
// ActionTypeSetCurrentImage replace image of member to current one.
|
||||
// ActionTypeSetMemberCurrentImage replace image of member to current one.
|
||||
ActionTypeSetMemberCurrentImage ActionType = "SetMemberCurrentImage"
|
||||
// ActionTypeDisableClusterScaling turns off scaling DBservers and coordinators
|
||||
ActionTypeDisableClusterScaling ActionType = "ScalingDisabled"
|
||||
|
@ -115,7 +115,7 @@ const (
|
|||
ActionTypePVCResize ActionType = "PVCResize"
|
||||
// ActionTypePVCResized waits for PVC to resize for defined time
|
||||
ActionTypePVCResized ActionType = "PVCResized"
|
||||
// UpToDateUpdateResized define up to date annotation in spec
|
||||
// UpToDateUpdate define up to date annotation in spec
|
||||
UpToDateUpdate ActionType = "UpToDateUpdate"
|
||||
// ActionTypeBackupRestore restore plan
|
||||
ActionTypeBackupRestore ActionType = "BackupRestore"
|
||||
|
|
|
@ -59,7 +59,7 @@ const (
|
|||
|
||||
// ConditionTypeTerminating indicates that the member is terminating but not yet terminated.
|
||||
ConditionTypeTerminating ConditionType = "Terminating"
|
||||
// ConditionTypeTerminating indicates that the deployment is up to date.
|
||||
// ConditionTypeUpToDate indicates that the deployment is up to date.
|
||||
ConditionTypeUpToDate ConditionType = "UpToDate"
|
||||
// ConditionTypeMarkedToRemove indicates that the member is marked to be removed.
|
||||
ConditionTypeMarkedToRemove ConditionType = "MarkedToRemove"
|
||||
|
|
|
@ -77,7 +77,7 @@ const (
|
|||
ActionTypeRotateMember ActionType = "RotateMember"
|
||||
// ActionTypeRotateStartMember causes a member to be shutdown and have it's pod removed. Do not wait to pod recover.
|
||||
ActionTypeRotateStartMember ActionType = "RotateStartMember"
|
||||
// ActionTypeRotateMember causes a member to be restored.
|
||||
// ActionTypeRotateStopMember causes a member to be restored.
|
||||
ActionTypeRotateStopMember ActionType = "RotateStopMember"
|
||||
// ActionTypeUpgradeMember causes a member to be shutdown and have it's pod removed, restarted with AutoUpgrade option, waited until termination and the restarted again.
|
||||
ActionTypeUpgradeMember ActionType = "UpgradeMember"
|
||||
|
@ -105,7 +105,7 @@ const (
|
|||
ActionTypeUpdateTLSSNI ActionType = "UpdateTLSSNI"
|
||||
// ActionTypeSetCurrentImage causes status.CurrentImage to be updated to the image given in the action.
|
||||
ActionTypeSetCurrentImage ActionType = "SetCurrentImage"
|
||||
// ActionTypeSetCurrentImage replace image of member to current one.
|
||||
// ActionTypeSetMemberCurrentImage replace image of member to current one.
|
||||
ActionTypeSetMemberCurrentImage ActionType = "SetMemberCurrentImage"
|
||||
// ActionTypeDisableClusterScaling turns off scaling DBservers and coordinators
|
||||
ActionTypeDisableClusterScaling ActionType = "ScalingDisabled"
|
||||
|
@ -115,7 +115,7 @@ const (
|
|||
ActionTypePVCResize ActionType = "PVCResize"
|
||||
// ActionTypePVCResized waits for PVC to resize for defined time
|
||||
ActionTypePVCResized ActionType = "PVCResized"
|
||||
// UpToDateUpdateResized define up to date annotation in spec
|
||||
// UpToDateUpdate define up to date annotation in spec
|
||||
UpToDateUpdate ActionType = "UpToDateUpdate"
|
||||
// ActionTypeBackupRestore restore plan
|
||||
ActionTypeBackupRestore ActionType = "BackupRestore"
|
||||
|
|
|
@ -23,9 +23,9 @@ package chaos
|
|||
import (
|
||||
"context"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
|
||||
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
// Context provides methods to the chaos package.
|
||||
|
@ -34,7 +34,7 @@ type Context interface {
|
|||
GetSpec() api.DeploymentSpec
|
||||
// DeletePod deletes a pod with given name in the namespace
|
||||
// of the deployment. If the pod does not exist, the error is ignored.
|
||||
DeletePod(ctx context.Context, podName string) error
|
||||
DeletePod(ctx context.Context, podName string, options meta.DeleteOptions) error
|
||||
// GetOwnedPods returns a list of all pods owned by the deployment.
|
||||
GetOwnedPods(ctx context.Context) ([]v1.Pod, error)
|
||||
}
|
||||
|
|
|
@ -25,10 +25,11 @@ import (
|
|||
"math/rand"
|
||||
"time"
|
||||
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/errors"
|
||||
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/rs/zerolog/log"
|
||||
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/errors"
|
||||
)
|
||||
|
||||
// Monkey is the service that introduces chaos in the deployment
|
||||
|
@ -87,7 +88,7 @@ func (m Monkey) killRandomPod(ctx context.Context) error {
|
|||
}
|
||||
p := pods[rand.Intn(len(pods))]
|
||||
m.log.Info().Str("pod-name", p.GetName()).Msg("Killing pod")
|
||||
if err := m.context.DeletePod(ctx, p.GetName()); err != nil {
|
||||
if err := m.context.DeletePod(ctx, p.GetName(), meta.DeleteOptions{}); err != nil {
|
||||
return errors.WithStack(err)
|
||||
}
|
||||
return nil
|
||||
|
|
|
@ -389,10 +389,10 @@ func (d *Deployment) GetPod(ctx context.Context, podName string) (*core.Pod, err
|
|||
|
||||
// DeletePod deletes a pod with given name in the namespace
|
||||
// of the deployment. If the pod does not exist, the error is ignored.
|
||||
func (d *Deployment) DeletePod(ctx context.Context, podName string) error {
|
||||
func (d *Deployment) DeletePod(ctx context.Context, podName string, options meta.DeleteOptions) error {
|
||||
log := d.deps.Log
|
||||
err := globals.GetGlobalTimeouts().Kubernetes().RunWithTimeout(ctx, func(ctxChild context.Context) error {
|
||||
return d.PodsModInterface().Delete(ctxChild, podName, meta.DeleteOptions{})
|
||||
return d.PodsModInterface().Delete(ctxChild, podName, options)
|
||||
})
|
||||
if err != nil && !k8sutil.IsNotFound(err) {
|
||||
log.Debug().Err(err).Str("pod", podName).Msg("Failed to remove pod")
|
||||
|
|
|
@ -125,3 +125,20 @@ func getActionFactory(t api.ActionType) (actionFactory, bool) {
|
|||
f, ok := actions[t]
|
||||
return f, ok
|
||||
}
|
||||
|
||||
type actionSuccess struct{}
|
||||
|
||||
// NewActionSuccess returns action which always returns success.
|
||||
func NewActionSuccess() ActionCore {
|
||||
return actionSuccess{}
|
||||
}
|
||||
|
||||
// Start always returns true.
|
||||
func (actionSuccess) Start(_ context.Context) (bool, error) {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// CheckProgress always returns true.
|
||||
func (actionSuccess) CheckProgress(_ context.Context) (bool, bool, error) {
|
||||
return true, false, nil
|
||||
}
|
||||
|
|
|
@ -23,8 +23,22 @@ package reconcile
|
|||
import (
|
||||
"context"
|
||||
|
||||
agencyCache "github.com/arangodb/kube-arangodb/pkg/deployment/agency"
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/rs/zerolog/log"
|
||||
core "k8s.io/api/core/v1"
|
||||
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
"github.com/arangodb/arangosync-client/client"
|
||||
"github.com/arangodb/go-driver"
|
||||
"github.com/arangodb/go-driver/agency"
|
||||
|
||||
backupApi "github.com/arangodb/kube-arangodb/pkg/apis/backup/v1"
|
||||
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
|
||||
agencyCache "github.com/arangodb/kube-arangodb/pkg/deployment/agency"
|
||||
"github.com/arangodb/kube-arangodb/pkg/deployment/resources"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/errors"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
|
||||
inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/arangomember"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/persistentvolumeclaim"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/pod"
|
||||
|
@ -33,25 +47,6 @@ import (
|
|||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/service"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/serviceaccount"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/servicemonitor"
|
||||
|
||||
"github.com/arangodb/kube-arangodb/pkg/deployment/resources"
|
||||
|
||||
inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector"
|
||||
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/errors"
|
||||
|
||||
backupApi "github.com/arangodb/kube-arangodb/pkg/apis/backup/v1"
|
||||
|
||||
"github.com/arangodb/go-driver/agency"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
|
||||
core "k8s.io/api/core/v1"
|
||||
|
||||
"github.com/arangodb/arangosync-client/client"
|
||||
driver "github.com/arangodb/go-driver"
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
|
||||
)
|
||||
|
||||
// ActionContext provides methods to the Action implementations
|
||||
|
@ -103,7 +98,7 @@ type ActionContext interface {
|
|||
GetPod(ctx context.Context, podName string) (*core.Pod, error)
|
||||
// DeletePod deletes a pod with given name in the namespace
|
||||
// of the deployment. If the pod does not exist, the error is ignored.
|
||||
DeletePod(ctx context.Context, podName string) error
|
||||
DeletePod(ctx context.Context, podName string, options meta.DeleteOptions) error
|
||||
// DeletePvc deletes a persistent volume claim with given name in the namespace
|
||||
// of the deployment. If the pvc does not exist, the error is ignored.
|
||||
DeletePvc(ctx context.Context, pvcName string) error
|
||||
|
@ -427,8 +422,8 @@ func (ac *actionContext) GetPod(ctx context.Context, podName string) (*core.Pod,
|
|||
|
||||
// DeletePod deletes a pod with given name in the namespace
|
||||
// of the deployment. If the pod does not exist, the error is ignored.
|
||||
func (ac *actionContext) DeletePod(ctx context.Context, podName string) error {
|
||||
if err := ac.context.DeletePod(ctx, podName); err != nil {
|
||||
func (ac *actionContext) DeletePod(ctx context.Context, podName string, options meta.DeleteOptions) error {
|
||||
if err := ac.context.DeletePod(ctx, podName, options); err != nil {
|
||||
return errors.WithStack(err)
|
||||
}
|
||||
return nil
|
||||
|
|
|
@ -23,12 +23,13 @@ package reconcile
|
|||
import (
|
||||
"context"
|
||||
|
||||
"github.com/rs/zerolog"
|
||||
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
|
||||
"github.com/arangodb/kube-arangodb/pkg/deployment/features"
|
||||
"github.com/arangodb/kube-arangodb/pkg/handlers/utils"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/constants"
|
||||
|
||||
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
|
||||
"github.com/rs/zerolog"
|
||||
)
|
||||
|
||||
func init() {
|
||||
|
@ -66,7 +67,7 @@ func (a *actionKillMemberPod) Start(ctx context.Context) (bool, error) {
|
|||
return true, nil
|
||||
}
|
||||
|
||||
if err := a.actionCtx.DeletePod(ctx, m.PodName); err != nil {
|
||||
if err := a.actionCtx.DeletePod(ctx, m.PodName, meta.DeleteOptions{}); err != nil {
|
||||
log.Error().Err(err).Msg("Unable to kill pod")
|
||||
return true, nil
|
||||
}
|
||||
|
|
|
@ -23,17 +23,15 @@ package reconcile
|
|||
import (
|
||||
"context"
|
||||
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/globals"
|
||||
|
||||
apiErrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/errors"
|
||||
|
||||
"github.com/arangodb/go-driver"
|
||||
"github.com/rs/zerolog"
|
||||
apiErrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
driver "github.com/arangodb/go-driver"
|
||||
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/arangod"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/errors"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/globals"
|
||||
)
|
||||
|
||||
func init() {
|
||||
|
@ -108,7 +106,7 @@ func (a *actionRemoveMember) Start(ctx context.Context) (bool, error) {
|
|||
}
|
||||
if m.PodName != "" {
|
||||
// Remove the pod (if any)
|
||||
if err := a.actionCtx.DeletePod(ctx, m.PodName); err != nil {
|
||||
if err := a.actionCtx.DeletePod(ctx, m.PodName, meta.DeleteOptions{}); err != nil {
|
||||
if !apiErrors.IsNotFound(err) {
|
||||
return false, errors.WithStack(err)
|
||||
}
|
||||
|
|
|
@ -23,12 +23,12 @@ package reconcile
|
|||
import (
|
||||
"context"
|
||||
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
|
||||
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/errors"
|
||||
"github.com/rs/zerolog"
|
||||
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/errors"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
|
||||
)
|
||||
|
||||
func init() {
|
||||
|
@ -55,13 +55,12 @@ type actionRotateMember struct {
|
|||
// Returns true if the action is completely finished, false in case
|
||||
// the start time needs to be recorded and a ready condition needs to be checked.
|
||||
func (a *actionRotateMember) Start(ctx context.Context) (bool, error) {
|
||||
log := a.log
|
||||
m, ok := a.actionCtx.GetMemberStatusByID(a.action.MemberID)
|
||||
shutdown, m, ok := getShutdownHelper(&a.action, a.actionCtx, a.log)
|
||||
if !ok {
|
||||
log.Error().Msg("No such member")
|
||||
return true, nil
|
||||
}
|
||||
|
||||
if ready, err := getShutdownHelper(&a.action, a.actionCtx, a.log).Start(ctx); err != nil {
|
||||
if ready, err := shutdown.Start(ctx); err != nil {
|
||||
return false, err
|
||||
} else if ready {
|
||||
return true, nil
|
||||
|
@ -81,20 +80,19 @@ func (a *actionRotateMember) Start(ctx context.Context) (bool, error) {
|
|||
func (a *actionRotateMember) CheckProgress(ctx context.Context) (bool, bool, error) {
|
||||
// Check that pod is removed
|
||||
log := a.log
|
||||
m, found := a.actionCtx.GetMemberStatusByID(a.action.MemberID)
|
||||
if !found {
|
||||
log.Error().Msg("No such member")
|
||||
shutdown, m, ok := getShutdownHelper(&a.action, a.actionCtx, a.log)
|
||||
if !ok {
|
||||
return true, false, nil
|
||||
}
|
||||
|
||||
if ready, abort, err := getShutdownHelper(&a.action, a.actionCtx, a.log).CheckProgress(ctx); err != nil {
|
||||
if ready, abort, err := shutdown.CheckProgress(ctx); err != nil {
|
||||
return false, abort, err
|
||||
} else if !ready {
|
||||
return false, false, nil
|
||||
}
|
||||
|
||||
// Pod is terminated, we can now remove it
|
||||
if err := a.actionCtx.DeletePod(ctx, m.PodName); err != nil {
|
||||
if err := a.actionCtx.DeletePod(ctx, m.PodName, meta.DeleteOptions{}); err != nil {
|
||||
if !k8sutil.IsNotFound(err) {
|
||||
log.Error().Err(err).Msg("Unable to delete pod")
|
||||
return false, false, nil
|
||||
|
|
|
@ -23,12 +23,12 @@ package reconcile
|
|||
import (
|
||||
"context"
|
||||
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
|
||||
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/errors"
|
||||
"github.com/rs/zerolog"
|
||||
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/errors"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
|
||||
)
|
||||
|
||||
func init() {
|
||||
|
@ -55,13 +55,12 @@ type actionRotateStartMember struct {
|
|||
// Returns true if the action is completely finished, false in case
|
||||
// the start time needs to be recorded and a ready condition needs to be checked.
|
||||
func (a *actionRotateStartMember) Start(ctx context.Context) (bool, error) {
|
||||
log := a.log
|
||||
m, ok := a.actionCtx.GetMemberStatusByID(a.action.MemberID)
|
||||
shutdown, m, ok := getShutdownHelper(&a.action, a.actionCtx, a.log)
|
||||
if !ok {
|
||||
log.Error().Msg("No such member")
|
||||
return true, nil
|
||||
}
|
||||
|
||||
if ready, err := getShutdownHelper(&a.action, a.actionCtx, a.log).Start(ctx); err != nil {
|
||||
if ready, err := shutdown.Start(ctx); err != nil {
|
||||
return false, err
|
||||
} else if ready {
|
||||
return true, nil
|
||||
|
@ -81,20 +80,19 @@ func (a *actionRotateStartMember) Start(ctx context.Context) (bool, error) {
|
|||
func (a *actionRotateStartMember) CheckProgress(ctx context.Context) (bool, bool, error) {
|
||||
// Check that pod is removed
|
||||
log := a.log
|
||||
m, found := a.actionCtx.GetMemberStatusByID(a.action.MemberID)
|
||||
if !found {
|
||||
log.Error().Msg("No such member")
|
||||
shutdown, m, ok := getShutdownHelper(&a.action, a.actionCtx, a.log)
|
||||
if !ok {
|
||||
return true, false, nil
|
||||
}
|
||||
|
||||
if ready, abort, err := getShutdownHelper(&a.action, a.actionCtx, a.log).CheckProgress(ctx); err != nil {
|
||||
if ready, abort, err := shutdown.CheckProgress(ctx); err != nil {
|
||||
return false, abort, err
|
||||
} else if !ready {
|
||||
return false, false, nil
|
||||
}
|
||||
|
||||
// Pod is terminated, we can now remove it
|
||||
if err := a.actionCtx.DeletePod(ctx, m.PodName); err != nil {
|
||||
if err := a.actionCtx.DeletePod(ctx, m.PodName, meta.DeleteOptions{}); err != nil {
|
||||
if !k8sutil.IsNotFound(err) {
|
||||
log.Error().Err(err).Msg("Unable to delete pod")
|
||||
return false, false, nil
|
||||
|
|
|
@ -53,14 +53,12 @@ type actionShutdownMember struct {
|
|||
// Returns true if the action is completely finished, false in case
|
||||
// the start time needs to be recorded and a ready condition needs to be checked.
|
||||
func (a *actionShutdownMember) Start(ctx context.Context) (bool, error) {
|
||||
log := a.log
|
||||
m, ok := a.actionCtx.GetMemberStatusByID(a.action.MemberID)
|
||||
shutdown, m, ok := getShutdownHelper(&a.action, a.actionCtx, a.log)
|
||||
if !ok {
|
||||
log.Error().Msg("No such member")
|
||||
return true, nil
|
||||
}
|
||||
|
||||
if ready, err := getShutdownHelper(&a.action, a.actionCtx, a.log).Start(ctx); err != nil {
|
||||
if ready, err := shutdown.Start(ctx); err != nil {
|
||||
return false, err
|
||||
} else if ready {
|
||||
return true, nil
|
||||
|
@ -77,7 +75,12 @@ func (a *actionShutdownMember) Start(ctx context.Context) (bool, error) {
|
|||
// CheckProgress checks the progress of the action.
|
||||
// Returns: ready, abort, error.
|
||||
func (a *actionShutdownMember) CheckProgress(ctx context.Context) (bool, bool, error) {
|
||||
if ready, abort, err := getShutdownHelper(&a.action, a.actionCtx, a.log).CheckProgress(ctx); err != nil {
|
||||
shutdown, _, ok := getShutdownHelper(&a.action, a.actionCtx, a.log)
|
||||
if !ok {
|
||||
return true, false, nil
|
||||
}
|
||||
|
||||
if ready, abort, err := shutdown.CheckProgress(ctx); err != nil {
|
||||
return false, abort, err
|
||||
} else {
|
||||
return ready, false, nil
|
||||
|
|
|
@ -23,15 +23,15 @@ package reconcile
|
|||
import (
|
||||
"context"
|
||||
|
||||
"github.com/arangodb/kube-arangodb/pkg/deployment/resources"
|
||||
|
||||
"github.com/arangodb/arangosync-client/client"
|
||||
driver "github.com/arangodb/go-driver"
|
||||
"github.com/arangodb/go-driver"
|
||||
"github.com/arangodb/go-driver/agency"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
backupApi "github.com/arangodb/kube-arangodb/pkg/apis/backup/v1"
|
||||
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
|
||||
"github.com/arangodb/kube-arangodb/pkg/deployment/resources"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/arangod/conn"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
|
||||
)
|
||||
|
@ -79,7 +79,7 @@ type Context interface {
|
|||
GetPod(ctx context.Context, podName string) (*v1.Pod, error)
|
||||
// DeletePod deletes a pod with given name in the namespace
|
||||
// of the deployment. If the pod does not exist, the error is ignored.
|
||||
DeletePod(ctx context.Context, podName string) error
|
||||
DeletePod(ctx context.Context, podName string, options meta.DeleteOptions) error
|
||||
// DeletePvc deletes a persistent volume claim with given name in the namespace
|
||||
// of the deployment. If the pvc does not exist, the error is ignored.
|
||||
DeletePvc(ctx context.Context, pvcName string) error
|
||||
|
|
|
@ -23,35 +23,62 @@ package reconcile
|
|||
import (
|
||||
"context"
|
||||
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/globals"
|
||||
"github.com/arangodb/kube-arangodb/pkg/apis/deployment"
|
||||
|
||||
"github.com/arangodb/kube-arangodb/pkg/deployment/features"
|
||||
"github.com/rs/zerolog"
|
||||
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
|
||||
"github.com/arangodb/kube-arangodb/pkg/deployment/features"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/errors"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/globals"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
|
||||
"github.com/rs/zerolog"
|
||||
)
|
||||
|
||||
func getShutdownHelper(a *api.Action, ctx ActionContext, log zerolog.Logger) ActionCore {
|
||||
if features.GracefulShutdown().Enabled() {
|
||||
return shutdownHelperAPI{action: a, actionCtx: ctx, log: log}
|
||||
// getShutdownHelper returns an action to shut down a pod according to the settings.
|
||||
// Returns true when member status exists.
|
||||
// There are 3 possibilities to shut down the pod: immediately, gracefully, standard kubernetes delete API.
|
||||
// When pod does not exist then success action (which always successes) is returned.
|
||||
func getShutdownHelper(a *api.Action, actionCtx ActionContext, log zerolog.Logger) (ActionCore, api.MemberStatus, bool) {
|
||||
m, ok := actionCtx.GetMemberStatusByID(a.MemberID)
|
||||
if !ok {
|
||||
log.Warn().Str("pod-name", m.PodName).Msg("member is already gone")
|
||||
|
||||
return nil, api.MemberStatus{}, false
|
||||
}
|
||||
|
||||
serverGroup := ctx.GetSpec().GetServerGroupSpec(a.Group)
|
||||
pod, ok := actionCtx.GetCachedStatus().Pod(m.PodName)
|
||||
if !ok {
|
||||
log.Warn().Str("pod-name", m.PodName).Msg("pod is already gone")
|
||||
// Pod does not exist, so create success action to finish it immediately.
|
||||
return NewActionSuccess(), m, true
|
||||
}
|
||||
|
||||
if _, ok := pod.GetAnnotations()[deployment.ArangoDeploymentPodDeleteNow]; ok {
|
||||
// The pod contains annotation, so pod must be deleted immediately.
|
||||
return shutdownNow{action: a, actionCtx: actionCtx, log: log, memberStatus: m}, m, true
|
||||
}
|
||||
|
||||
if features.GracefulShutdown().Enabled() {
|
||||
return shutdownHelperAPI{action: a, actionCtx: actionCtx, log: log, memberStatus: m}, m, true
|
||||
}
|
||||
|
||||
serverGroup := actionCtx.GetSpec().GetServerGroupSpec(a.Group)
|
||||
|
||||
switch serverGroup.ShutdownMethod.Get() {
|
||||
case api.ServerGroupShutdownMethodDelete:
|
||||
return shutdownHelperDelete{action: a, actionCtx: ctx, log: log}
|
||||
return shutdownHelperDelete{action: a, actionCtx: actionCtx, log: log, memberStatus: m}, m, true
|
||||
default:
|
||||
return shutdownHelperAPI{action: a, actionCtx: ctx, log: log}
|
||||
return shutdownHelperAPI{action: a, actionCtx: actionCtx, log: log, memberStatus: m}, m, true
|
||||
}
|
||||
}
|
||||
|
||||
type shutdownHelperAPI struct {
|
||||
log zerolog.Logger
|
||||
action *api.Action
|
||||
actionCtx ActionContext
|
||||
log zerolog.Logger
|
||||
action *api.Action
|
||||
actionCtx ActionContext
|
||||
memberStatus api.MemberStatus
|
||||
}
|
||||
|
||||
func (s shutdownHelperAPI) Start(ctx context.Context) (bool, error) {
|
||||
|
@ -60,18 +87,14 @@ func (s shutdownHelperAPI) Start(ctx context.Context) (bool, error) {
|
|||
log.Info().Msgf("Using API to shutdown member")
|
||||
|
||||
group := s.action.Group
|
||||
m, ok := s.actionCtx.GetMemberStatusByID(s.action.MemberID)
|
||||
if !ok {
|
||||
log.Error().Msg("No such member")
|
||||
return true, nil
|
||||
}
|
||||
if m.PodName == "" {
|
||||
podName := s.memberStatus.PodName
|
||||
if podName == "" {
|
||||
log.Warn().Msgf("Pod is empty")
|
||||
return true, nil
|
||||
}
|
||||
// Remove finalizers, so Kubernetes will quickly terminate the pod
|
||||
if !features.GracefulShutdown().Enabled() {
|
||||
if err := s.actionCtx.RemovePodFinalizers(ctx, m.PodName); err != nil {
|
||||
if err := s.actionCtx.RemovePodFinalizers(ctx, podName); err != nil {
|
||||
return false, errors.WithStack(err)
|
||||
}
|
||||
}
|
||||
|
@ -100,7 +123,7 @@ func (s shutdownHelperAPI) Start(ctx context.Context) (bool, error) {
|
|||
}
|
||||
} else if group.IsArangosync() {
|
||||
// Terminate pod
|
||||
if err := s.actionCtx.DeletePod(ctx, m.PodName); err != nil {
|
||||
if err := s.actionCtx.DeletePod(ctx, podName, meta.DeleteOptions{}); err != nil {
|
||||
return false, errors.WithStack(err)
|
||||
}
|
||||
}
|
||||
|
@ -108,26 +131,17 @@ func (s shutdownHelperAPI) Start(ctx context.Context) (bool, error) {
|
|||
return false, nil
|
||||
}
|
||||
|
||||
func (s shutdownHelperAPI) CheckProgress(ctx context.Context) (bool, bool, error) {
|
||||
// Check that pod is removed
|
||||
log := s.log
|
||||
m, found := s.actionCtx.GetMemberStatusByID(s.action.MemberID)
|
||||
if !found {
|
||||
log.Error().Msg("No such member")
|
||||
return true, false, nil
|
||||
}
|
||||
if !m.Conditions.IsTrue(api.ConditionTypeTerminated) {
|
||||
// Pod is not yet terminated
|
||||
return false, false, nil
|
||||
}
|
||||
|
||||
return true, false, nil
|
||||
// CheckProgress returns true when pod is terminated.
|
||||
func (s shutdownHelperAPI) CheckProgress(_ context.Context) (bool, bool, error) {
|
||||
terminated := s.memberStatus.Conditions.IsTrue(api.ConditionTypeTerminated)
|
||||
return terminated, false, nil
|
||||
}
|
||||
|
||||
type shutdownHelperDelete struct {
|
||||
log zerolog.Logger
|
||||
action *api.Action
|
||||
actionCtx ActionContext
|
||||
log zerolog.Logger
|
||||
action *api.Action
|
||||
actionCtx ActionContext
|
||||
memberStatus api.MemberStatus
|
||||
}
|
||||
|
||||
func (s shutdownHelperDelete) Start(ctx context.Context) (bool, error) {
|
||||
|
@ -135,23 +149,17 @@ func (s shutdownHelperDelete) Start(ctx context.Context) (bool, error) {
|
|||
|
||||
log.Info().Msgf("Using Pod Delete to shutdown member")
|
||||
|
||||
m, ok := s.actionCtx.GetMemberStatusByID(s.action.MemberID)
|
||||
if !ok {
|
||||
log.Error().Msg("No such member")
|
||||
return true, nil
|
||||
}
|
||||
|
||||
if m.PodName == "" {
|
||||
podName := s.memberStatus.PodName
|
||||
if podName == "" {
|
||||
log.Warn().Msgf("Pod is empty")
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// Terminate pod
|
||||
if err := s.actionCtx.DeletePod(ctx, m.PodName); err != nil {
|
||||
if err := s.actionCtx.DeletePod(ctx, podName, meta.DeleteOptions{}); err != nil {
|
||||
if !k8sutil.IsNotFound(err) {
|
||||
return false, errors.WithStack(err)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return false, nil
|
||||
|
@ -160,20 +168,15 @@ func (s shutdownHelperDelete) Start(ctx context.Context) (bool, error) {
|
|||
func (s shutdownHelperDelete) CheckProgress(ctx context.Context) (bool, bool, error) {
|
||||
// Check that pod is removed
|
||||
log := s.log
|
||||
m, found := s.actionCtx.GetMemberStatusByID(s.action.MemberID)
|
||||
if !found {
|
||||
log.Error().Msg("No such member")
|
||||
return true, false, nil
|
||||
}
|
||||
|
||||
if !m.Conditions.IsTrue(api.ConditionTypeTerminated) {
|
||||
if !s.memberStatus.Conditions.IsTrue(api.ConditionTypeTerminated) {
|
||||
// Pod is not yet terminated
|
||||
log.Warn().Msgf("Pod not yet terminated")
|
||||
return false, false, nil
|
||||
}
|
||||
|
||||
if m.PodName != "" {
|
||||
if _, err := s.actionCtx.GetPod(ctx, m.PodName); err == nil {
|
||||
podName := s.memberStatus.PodName
|
||||
if podName != "" {
|
||||
if _, err := s.actionCtx.GetPod(ctx, podName); err == nil {
|
||||
log.Warn().Msgf("Pod still exists")
|
||||
return false, false, nil
|
||||
} else if !k8sutil.IsNotFound(err) {
|
||||
|
@ -184,3 +187,58 @@ func (s shutdownHelperDelete) CheckProgress(ctx context.Context) (bool, bool, er
|
|||
|
||||
return true, false, nil
|
||||
}
|
||||
|
||||
type shutdownNow struct {
|
||||
action *api.Action
|
||||
actionCtx ActionContext
|
||||
memberStatus api.MemberStatus
|
||||
log zerolog.Logger
|
||||
}
|
||||
|
||||
// Start starts removing pod forcefully.
|
||||
func (s shutdownNow) Start(ctx context.Context) (bool, error) {
|
||||
// Check progress is used here because removing pod can start gracefully,
|
||||
// and then it can be changed to force shutdown.
|
||||
s.log.Info().Msg("Using shutdown now method")
|
||||
ready, _, err := s.CheckProgress(ctx)
|
||||
return ready, err
|
||||
}
|
||||
|
||||
// CheckProgress starts removing pod forcefully and checks if has it been removed.
|
||||
func (s shutdownNow) CheckProgress(ctx context.Context) (bool, bool, error) {
|
||||
podName := s.memberStatus.PodName
|
||||
pod, err := s.actionCtx.GetPod(ctx, podName)
|
||||
if err != nil {
|
||||
if k8sutil.IsNotFound(err) {
|
||||
s.log.Info().Msg("Using shutdown now method completed because pod is gone")
|
||||
return true, false, nil
|
||||
}
|
||||
|
||||
return false, false, errors.Wrapf(err, "failed to get pod")
|
||||
}
|
||||
|
||||
if s.memberStatus.PodUID != pod.GetUID() {
|
||||
s.log.Info().Msg("Using shutdown now method completed because it is already rotated")
|
||||
// The new pod has been started already.
|
||||
return true, false, nil
|
||||
}
|
||||
|
||||
// Remove finalizers forcefully.
|
||||
if err := s.actionCtx.RemovePodFinalizers(ctx, podName); err != nil {
|
||||
return false, false, errors.WithStack(err)
|
||||
}
|
||||
|
||||
// Terminate pod.
|
||||
options := meta.DeleteOptions{
|
||||
// Leave one second to clean a PVC.
|
||||
GracePeriodSeconds: util.NewInt64(1),
|
||||
}
|
||||
if err := s.actionCtx.DeletePod(ctx, podName, options); err != nil {
|
||||
if !k8sutil.IsNotFound(err) {
|
||||
return false, false, errors.WithStack(err)
|
||||
}
|
||||
}
|
||||
|
||||
s.log.Info().Msgf("Using shutdown now method completed")
|
||||
return true, false, nil
|
||||
}
|
||||
|
|
|
@ -305,7 +305,7 @@ func (c *testContext) CreateMember(_ context.Context, group api.ServerGroup, id
|
|||
panic("implement me")
|
||||
}
|
||||
|
||||
func (c *testContext) DeletePod(_ context.Context, podName string) error {
|
||||
func (c *testContext) DeletePod(_ context.Context, _ string, _ meta.DeleteOptions) error {
|
||||
panic("implement me")
|
||||
}
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ import (
|
|||
"context"
|
||||
|
||||
"github.com/rs/zerolog"
|
||||
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
|
||||
)
|
||||
|
@ -53,7 +54,7 @@ func (r *Reconciler) CheckDeployment(ctx context.Context) error {
|
|||
if status.Members.Coordinators.AllFailed() {
|
||||
r.log.Error().Msg("All coordinators failed - reset")
|
||||
for _, m := range status.Members.Coordinators {
|
||||
if err := r.context.DeletePod(ctx, m.PodName); err != nil {
|
||||
if err := r.context.DeletePod(ctx, m.PodName, meta.DeleteOptions{}); err != nil {
|
||||
r.log.Error().Err(err).Msg("Failed to delete pod")
|
||||
}
|
||||
m.Phase = api.MemberPhaseNone
|
||||
|
|
|
@ -23,10 +23,18 @@ package resources
|
|||
import (
|
||||
"context"
|
||||
|
||||
"github.com/arangodb/kube-arangodb/pkg/deployment/patch"
|
||||
"github.com/arangodb/go-driver"
|
||||
"github.com/arangodb/go-driver/agency"
|
||||
core "k8s.io/api/core/v1"
|
||||
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
backupApi "github.com/arangodb/kube-arangodb/pkg/apis/backup/v1"
|
||||
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
|
||||
agencyCache "github.com/arangodb/kube-arangodb/pkg/deployment/agency"
|
||||
|
||||
"github.com/arangodb/kube-arangodb/pkg/deployment/patch"
|
||||
"github.com/arangodb/kube-arangodb/pkg/operator/scope"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
|
||||
inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/arangomember"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/persistentvolumeclaim"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/pod"
|
||||
|
@ -35,17 +43,6 @@ import (
|
|||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/service"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/serviceaccount"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/servicemonitor"
|
||||
|
||||
"github.com/arangodb/kube-arangodb/pkg/operator/scope"
|
||||
|
||||
backupApi "github.com/arangodb/kube-arangodb/pkg/apis/backup/v1"
|
||||
|
||||
driver "github.com/arangodb/go-driver"
|
||||
"github.com/arangodb/go-driver/agency"
|
||||
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
|
||||
inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector"
|
||||
core "k8s.io/api/core/v1"
|
||||
)
|
||||
|
||||
// ServerGroupIterator provides a helper to callback on every server
|
||||
|
@ -196,7 +193,7 @@ type Context interface {
|
|||
CleanupPod(ctx context.Context, p *core.Pod) error
|
||||
// DeletePod deletes a pod with given name in the namespace
|
||||
// of the deployment. If the pod does not exist, the error is ignored.
|
||||
DeletePod(ctx context.Context, podName string) error
|
||||
DeletePod(ctx context.Context, podName string, options meta.DeleteOptions) error
|
||||
// DeletePvc deletes a persistent volume claim with given name in the namespace
|
||||
// of the deployment. If the pvc does not exist, the error is ignored.
|
||||
DeletePvc(ctx context.Context, pvcName string) error
|
||||
|
|
|
@ -151,7 +151,7 @@ func (r *Resources) cleanupRemovedClusterMembers(ctx context.Context) error {
|
|||
|
||||
for _, podName := range podNamesToRemove {
|
||||
log.Info().Str("pod", podName).Msg("Removing obsolete member pod")
|
||||
if err := r.context.DeletePod(ctx, podName); err != nil && !k8sutil.IsNotFound(err) {
|
||||
if err := r.context.DeletePod(ctx, podName, metav1.DeleteOptions{}); err != nil && !k8sutil.IsNotFound(err) {
|
||||
log.Warn().Err(err).Str("pod", podName).Msg("Failed to remove obsolete pod")
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue