1
0
Fork 0
mirror of https://github.com/arangodb/kube-arangodb.git synced 2024-12-14 11:57:37 +00:00

[Feature] Maintenance mode improvements (#757)

This commit is contained in:
Adam Janikowski 2021-07-15 14:07:33 +02:00 committed by GitHub
parent 78b989c0a4
commit 5a1500293f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
28 changed files with 467 additions and 120 deletions

4
go.mod
View file

@ -3,7 +3,7 @@ module github.com/arangodb/kube-arangodb
go 1.16
replace (
github.com/arangodb/go-driver => github.com/arangodb/go-driver v0.0.0-20210518064911-4985e8be3d90
github.com/arangodb/go-driver => github.com/arangodb/go-driver v0.0.0-20210621075908-e7a6fa0cbd18
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring => github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.46.0
github.com/prometheus-operator/prometheus-operator/pkg/client => github.com/prometheus-operator/prometheus-operator/pkg/client v0.46.0
github.com/stretchr/testify => github.com/stretchr/testify v1.5.1
@ -26,7 +26,7 @@ require (
github.com/aktau/github-release v0.10.0 // indirect
github.com/arangodb-helper/go-certificates v0.0.0-20180821055445-9fca24fc2680
github.com/arangodb/arangosync-client v0.6.3
github.com/arangodb/go-driver v0.0.0-20191002124627-11b6bfc64f67
github.com/arangodb/go-driver v0.0.0-20210621075908-e7a6fa0cbd18
github.com/arangodb/go-upgrade-rules v0.0.0-20180809110947-031b4774ff21
github.com/cenkalti/backoff v2.1.1+incompatible
github.com/coreos/go-semver v0.3.0

2
go.sum
View file

@ -56,6 +56,8 @@ github.com/arangodb/go-driver v0.0.0-20210517114204-8cc084268066 h1:NneOFWxfa7rh
github.com/arangodb/go-driver v0.0.0-20210517114204-8cc084268066/go.mod h1:3NUekcRLpgheFIGEwcOvxilEW73MV1queNKW58k7sdc=
github.com/arangodb/go-driver v0.0.0-20210518064911-4985e8be3d90 h1:NMnMsS32jOF+e0v+MLXlgRJM7ejSAXxHg1UDv1q417I=
github.com/arangodb/go-driver v0.0.0-20210518064911-4985e8be3d90/go.mod h1:3NUekcRLpgheFIGEwcOvxilEW73MV1queNKW58k7sdc=
github.com/arangodb/go-driver v0.0.0-20210621075908-e7a6fa0cbd18 h1:3J0tqp5eQ8ptGOeeu7vo92RKf24bOA7MFy0z3uPiTWg=
github.com/arangodb/go-driver v0.0.0-20210621075908-e7a6fa0cbd18/go.mod h1:3NUekcRLpgheFIGEwcOvxilEW73MV1queNKW58k7sdc=
github.com/arangodb/go-upgrade-rules v0.0.0-20180809110947-031b4774ff21 h1:+W7D5ttxi/Ygh/39vialtypE23p9KI7P0J2qtoqUV4w=
github.com/arangodb/go-upgrade-rules v0.0.0-20180809110947-031b4774ff21/go.mod h1:RkPIG6JJ2pcJUoymc18NxAJGraZd+iAEVnOTDjZey/w=
github.com/arangodb/go-velocypack v0.0.0-20200318135517-5af53c29c67e h1:Xg+hGrY2LcQBbxd0ZFdbGSyRKTYMZCfBbw/pMJFOk1g=

View file

@ -63,8 +63,10 @@ const (
ConditionTypeUpToDate ConditionType = "UpToDate"
// ConditionTypeMarkedToRemove indicates that the member is marked to be removed.
ConditionTypeMarkedToRemove ConditionType = "MarkedToRemove"
// ConditionTypeUpgradeFailed indicates that mem
// ConditionTypeUpgradeFailed indicates that upgrade failed
ConditionTypeUpgradeFailed ConditionType = "UpgradeFailed"
// ConditionTypeMaintenanceMode indicates that Maintenance is enabled
ConditionTypeMaintenanceMode ConditionType = "MaintenanceMode"
)
// Condition represents one current condition of a deployment or deployment member.
@ -146,6 +148,19 @@ func (list ConditionList) Get(conditionType ConditionType) (Condition, bool) {
return Condition{}, false
}
// Touch update condition LastUpdateTime if condition exists
func (list *ConditionList) Touch(conditionType ConditionType) bool {
src := *list
for i, x := range src {
if x.Type == conditionType {
src[i].LastUpdateTime = metav1.Now()
return true
}
}
return false
}
// Update the condition, replacing an old condition with same type (if any)
// Returns true when changes were made, false otherwise.
func (list *ConditionList) Update(conditionType ConditionType, status bool, reason, message string) bool {

View file

@ -129,6 +129,8 @@ const (
ActionTypeEnableMaintenance ActionType = "EnableMaintenance"
// ActionTypeEnableMaintenance disables maintenance on cluster.
ActionTypeDisableMaintenance ActionType = "DisableMaintenance"
// ActionTypeSetMaintenanceCondition sets maintenance condition.
ActionTypeSetMaintenanceCondition ActionType = "SetMaintenanceCondition"
// ActionTypeBootstrapUpdate update bootstrap status to true
ActionTypeBootstrapUpdate ActionType = "BootstrapUpdate"
// ActionTypeBootstrapSetPassword set password to the bootstrapped user
@ -220,6 +222,11 @@ func (a Action) SetImage(image string) Action {
return a
}
// AsPlan parse action list into plan
func AsPlan(a []Action) Plan {
return a
}
// Plan is a list of actions that will be taken to update a deployment.
// Only 1 action is in progress at a time. The operator will wait for that
// action to be completely and then remove the action.
@ -245,3 +252,38 @@ func (p Plan) Equal(other Plan) bool {
func (p Plan) IsEmpty() bool {
return len(p) == 0
}
// Add add action at the end of plan
func (p Plan) After(action ...Action) Plan {
n := Plan{}
n = append(n, p...)
n = append(n, action...)
return n
}
// Prefix add action at the beginning of plan
func (p Plan) Before(action ...Action) Plan {
n := Plan{}
n = append(n, action...)
n = append(n, p...)
return n
}
// Prefix add action at the beginning of plan
func (p Plan) Wrap(before, after Action) Plan {
n := Plan{}
n = append(n, before)
n = append(n, p...)
n = append(n, after)
return n
}

View file

@ -28,8 +28,22 @@ import (
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
)
const (
DefaultMaintenanceGracePeriod = 30 * time.Minute
)
type Timeouts struct {
AddMember *Timeout `json:"addMember,omitempty"`
MaintenanceGracePeriod *Timeout `json:"maintenanceGracePeriod,omitempty"`
}
func (t *Timeouts) GetMaintenanceGracePeriod() time.Duration {
if t == nil {
return DefaultMaintenanceGracePeriod
}
return t.MaintenanceGracePeriod.Get(DefaultMaintenanceGracePeriod)
}
func (t *Timeouts) Get() Timeouts {

View file

@ -2029,6 +2029,11 @@ func (in *Timeouts) DeepCopyInto(out *Timeouts) {
*out = new(Timeout)
**out = **in
}
if in.MaintenanceGracePeriod != nil {
in, out := &in.MaintenanceGracePeriod, &out.MaintenanceGracePeriod
*out = new(Timeout)
**out = **in
}
return
}

View file

@ -63,8 +63,10 @@ const (
ConditionTypeUpToDate ConditionType = "UpToDate"
// ConditionTypeMarkedToRemove indicates that the member is marked to be removed.
ConditionTypeMarkedToRemove ConditionType = "MarkedToRemove"
// ConditionTypeUpgradeFailed indicates that mem
// ConditionTypeUpgradeFailed indicates that upgrade failed
ConditionTypeUpgradeFailed ConditionType = "UpgradeFailed"
// ConditionTypeMaintenanceMode indicates that Maintenance is enabled
ConditionTypeMaintenanceMode ConditionType = "MaintenanceMode"
)
// Condition represents one current condition of a deployment or deployment member.
@ -146,6 +148,19 @@ func (list ConditionList) Get(conditionType ConditionType) (Condition, bool) {
return Condition{}, false
}
// Touch update condition LastUpdateTime if condition exists
func (list *ConditionList) Touch(conditionType ConditionType) bool {
src := *list
for i, x := range src {
if x.Type == conditionType {
src[i].LastUpdateTime = metav1.Now()
return true
}
}
return false
}
// Update the condition, replacing an old condition with same type (if any)
// Returns true when changes were made, false otherwise.
func (list *ConditionList) Update(conditionType ConditionType, status bool, reason, message string) bool {

View file

@ -129,6 +129,8 @@ const (
ActionTypeEnableMaintenance ActionType = "EnableMaintenance"
// ActionTypeEnableMaintenance disables maintenance on cluster.
ActionTypeDisableMaintenance ActionType = "DisableMaintenance"
// ActionTypeSetMaintenanceCondition sets maintenance condition.
ActionTypeSetMaintenanceCondition ActionType = "SetMaintenanceCondition"
// ActionTypeBootstrapUpdate update bootstrap status to true
ActionTypeBootstrapUpdate ActionType = "BootstrapUpdate"
// ActionTypeBootstrapSetPassword set password to the bootstrapped user
@ -220,6 +222,11 @@ func (a Action) SetImage(image string) Action {
return a
}
// AsPlan parse action list into plan
func AsPlan(a []Action) Plan {
return a
}
// Plan is a list of actions that will be taken to update a deployment.
// Only 1 action is in progress at a time. The operator will wait for that
// action to be completely and then remove the action.
@ -245,3 +252,38 @@ func (p Plan) Equal(other Plan) bool {
func (p Plan) IsEmpty() bool {
return len(p) == 0
}
// Add add action at the end of plan
func (p Plan) After(action ...Action) Plan {
n := Plan{}
n = append(n, p...)
n = append(n, action...)
return n
}
// Prefix add action at the beginning of plan
func (p Plan) Before(action ...Action) Plan {
n := Plan{}
n = append(n, action...)
n = append(n, p...)
return n
}
// Prefix add action at the beginning of plan
func (p Plan) Wrap(before, after Action) Plan {
n := Plan{}
n = append(n, before)
n = append(n, p...)
n = append(n, after)
return n
}

View file

@ -28,8 +28,22 @@ import (
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
)
const (
DefaultMaintenanceGracePeriod = 30 * time.Minute
)
type Timeouts struct {
AddMember *Timeout `json:"addMember,omitempty"`
MaintenanceGracePeriod *Timeout `json:"maintenanceGracePeriod,omitempty"`
}
func (t *Timeouts) GetMaintenanceGracePeriod() time.Duration {
if t == nil {
return DefaultMaintenanceGracePeriod
}
return t.MaintenanceGracePeriod.Get(DefaultMaintenanceGracePeriod)
}
func (t *Timeouts) Get() Timeouts {

View file

@ -2029,6 +2029,11 @@ func (in *Timeouts) DeepCopyInto(out *Timeouts) {
*out = new(Timeout)
**out = **in
}
if in.MaintenanceGracePeriod != nil {
in, out := &in.MaintenanceGracePeriod, &out.MaintenanceGracePeriod
*out = new(Timeout)
**out = **in
}
return
}

View file

@ -26,40 +26,25 @@ import (
"context"
"net/http"
"github.com/arangodb/go-driver/agency"
"github.com/arangodb/go-driver"
)
type Maintenance struct {
Result string `json:"result"`
}
func GetMaintenanceMode(ctx context.Context, client agency.Agency) (bool, error) {
var data interface{}
err := client.ReadKey(ctx, []string{"arango", "Supervision", "Maintenance"}, &data)
func (m Maintenance) Enabled() bool {
return m.Result == "Maintenance"
}
func GetMaintenanceMode(ctx context.Context, client driver.Client) (Maintenance, error) {
conn := client.Connection()
r, err := conn.NewRequest(http.MethodGet, "/_admin/cluster/maintenance")
if err != nil {
return Maintenance{}, err
if err == nil {
// We got 200
return true, nil
}
resp, err := conn.Do(ctx, r)
if err != nil {
return Maintenance{}, err
if agency.IsKeyNotFound(err) {
return false, nil
}
if err := resp.CheckStatus(http.StatusOK); err != nil {
return Maintenance{}, err
}
var m Maintenance
if err := resp.ParseBody("", &m); err != nil {
return Maintenance{}, err
}
return m, nil
return false, err
}
func SetMaintenanceMode(ctx context.Context, client driver.Client, enabled bool) error {

View file

@ -593,7 +593,7 @@ func (d *Deployment) GetArangoImage() string {
return d.config.ArangoImage
}
func (d *Deployment) WithStatusUpdate(ctx context.Context, action func(s *api.DeploymentStatus) bool, force ...bool) error {
func (d *Deployment) WithStatusUpdate(ctx context.Context, action resources.DeploymentStatusUpdateFunc, force ...bool) error {
d.status.mutex.Lock()
defer d.status.mutex.Unlock()

View file

@ -29,6 +29,8 @@ import (
"sync/atomic"
"time"
"github.com/arangodb/kube-arangodb/pkg/deployment/agency"
inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector"
deploymentClient "github.com/arangodb/kube-arangodb/pkg/deployment/client"
@ -138,6 +140,41 @@ type Deployment struct {
haveServiceMonitorCRD bool
}
func (d *Deployment) GetAgencyMaintenanceMode(ctx context.Context) (bool, error) {
if !d.Mode().HasAgents() {
return false, nil
}
ctxChild, cancel := context.WithTimeout(ctx, arangod.GetRequestTimeout())
defer cancel()
client, err := d.GetAgency(ctxChild)
if err != nil {
return false, err
}
if enabled, err := agency.GetMaintenanceMode(ctxChild, client); err != nil {
return false, err
} else {
return enabled, nil
}
}
func (d *Deployment) SetAgencyMaintenanceMode(ctx context.Context, enabled bool) error {
if !d.Mode().HasAgents() {
return nil
}
ctxChild, cancel := context.WithTimeout(ctx, arangod.GetRequestTimeout())
defer cancel()
client, err := d.GetDatabaseClient(ctxChild)
if err != nil {
return err
}
return agency.SetMaintenanceMode(ctxChild, client, enabled)
}
// New creates a new Deployment from the given API object.
func New(config Config, deps Dependencies, apiObject *api.ArangoDeployment) (*Deployment, error) {
if err := apiObject.Spec.Validate(); err != nil {

View file

@ -27,6 +27,8 @@ import (
"context"
"time"
"github.com/arangodb/kube-arangodb/pkg/deployment/features"
inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector"
"github.com/arangodb/kube-arangodb/pkg/util/errors"
@ -270,6 +272,9 @@ func (d *Deployment) inspectDeploymentWithError(ctx context.Context, lastInterva
nextInterval = interval
}
// Refresh maintenance lock
d.refreshMaintenanceTTL(ctx)
// Create scale/update plan
if _, ok := d.apiObject.Annotations[deployment.ArangoDeploymentPlanCleanAnnotation]; ok {
if err := d.ApplyPatch(ctx, patch.ItemRemove(patch.NewPath("metadata", "annotations", deployment.ArangoDeploymentPlanCleanAnnotation))); err != nil {
@ -344,6 +349,36 @@ func (d *Deployment) inspectDeploymentWithError(ctx context.Context, lastInterva
return
}
func (d *Deployment) refreshMaintenanceTTL(ctx context.Context) {
if d.apiObject.Spec.Mode.Get() == api.DeploymentModeSingle {
return
}
if !features.Maintenance().Enabled() {
// Maintenance feature is not enabled
return
}
condition, ok := d.status.last.Conditions.Get(api.ConditionTypeMaintenanceMode)
if !ok || !condition.IsTrue() {
return
}
// Check GracePeriod
if condition.LastUpdateTime.Add(d.apiObject.Spec.Timeouts.GetMaintenanceGracePeriod()).Before(time.Now()) {
if err := d.SetAgencyMaintenanceMode(ctx, true); err != nil {
return
}
if err := d.WithStatusUpdate(ctx, func(s *api.DeploymentStatus) bool {
return s.Conditions.Touch(api.ConditionTypeMaintenanceMode)
}); err != nil {
return
}
d.deps.Log.Info().Msgf("Refreshed maintenance lock")
}
}
func (d *Deployment) ensureResources(ctx context.Context, lastInterval util.Interval, cachedStatus inspectorInterface.Inspector) (util.Interval, error) {
// Ensure all resources are created
if d.haveServiceMonitorCRD {

View file

@ -26,6 +26,8 @@ package reconcile
import (
"context"
"github.com/arangodb/kube-arangodb/pkg/deployment/resources"
inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector"
"github.com/arangodb/kube-arangodb/pkg/util/errors"
@ -47,6 +49,9 @@ import (
// ActionContext provides methods to the Action implementations
// to control their context.
type ActionContext interface {
resources.DeploymentStatusUpdate
resources.DeploymentAgencyMaintenance
// GetAPIObject returns the deployment as k8s object.
GetAPIObject() k8sutil.APIObject
// Gets the specified mode of deployment
@ -125,8 +130,6 @@ type ActionContext interface {
// WithStatusUpdate update status of ArangoDeployment with defined modifier. If action returns True action is taken
UpdateClusterCondition(ctx context.Context, conditionType api.ConditionType, status bool, reason, message string) error
SecretsInterface() k8sutil.SecretInterface
// WithStatusUpdate update status of ArangoDeployment with defined modifier. If action returns True action is taken
WithStatusUpdate(ctx context.Context, action func(s *api.DeploymentStatus) bool, force ...bool) error
// GetBackup receives information about a backup resource
GetBackup(ctx context.Context, backup string) (*backupApi.ArangoBackup, error)
// GetName receives information about a deployment name
@ -151,6 +154,14 @@ type actionContext struct {
cachedStatus inspectorInterface.Inspector
}
func (ac *actionContext) GetAgencyMaintenanceMode(ctx context.Context) (bool, error) {
return ac.context.GetAgencyMaintenanceMode(ctx)
}
func (ac *actionContext) SetAgencyMaintenanceMode(ctx context.Context, enabled bool) error {
return ac.context.SetAgencyMaintenanceMode(ctx, enabled)
}
func (ac *actionContext) GetCachedStatus() inspectorInterface.Inspector {
return ac.cachedStatus
}
@ -171,7 +182,7 @@ func (ac *actionContext) GetBackup(ctx context.Context, backup string) (*backupA
return ac.context.GetBackup(ctx, backup)
}
func (ac *actionContext) WithStatusUpdate(ctx context.Context, action func(s *api.DeploymentStatus) bool, force ...bool) error {
func (ac *actionContext) WithStatusUpdate(ctx context.Context, action resources.DeploymentStatusUpdateFunc, force ...bool) error {
return ac.context.WithStatusUpdate(ctx, action, force...)
}

View file

@ -0,0 +1,77 @@
//
// DISCLAIMER
//
// Copyright 2020-2021 ArangoDB GmbH, Cologne, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
// Author Adam Janikowski
//
package reconcile
import (
"context"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
"github.com/rs/zerolog"
)
func init() {
registerAction(api.ActionTypeSetMaintenanceCondition, newSetMaintenanceConditionAction)
}
func newSetMaintenanceConditionAction(log zerolog.Logger, action api.Action, actionCtx ActionContext) Action {
a := &actionSetMaintenanceCondition{}
a.actionImpl = newActionImpl(log, action, actionCtx, addMemberTimeout, &a.newMemberID)
return a
}
type actionSetMaintenanceCondition struct {
// actionImpl implement timeout and member id functions
actionImpl
actionEmptyCheckProgress
newMemberID string
}
func (a *actionSetMaintenanceCondition) Start(ctx context.Context) (bool, error) {
switch a.actionCtx.GetMode() {
case api.DeploymentModeSingle:
return true, nil
}
if maintenance, err := a.actionCtx.GetAgencyMaintenanceMode(ctx); err != nil {
a.log.Error().Err(err).Msgf("Unable to set maintenance condition")
return true, nil
} else {
if err := a.actionCtx.WithStatusUpdate(ctx, func(s *api.DeploymentStatus) bool {
if maintenance {
return s.Conditions.Update(api.ConditionTypeMaintenanceMode, true, "Maintenance", "Maintenance enabled")
} else {
return s.Conditions.Remove(api.ConditionTypeMaintenanceMode)
}
}); err != nil {
a.log.Error().Err(err).Msgf("Unable to set maintenance condition")
return true, nil
}
}
return true, nil
}

View file

@ -27,8 +27,6 @@ import (
"context"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
"github.com/arangodb/kube-arangodb/pkg/deployment/agency"
"github.com/arangodb/kube-arangodb/pkg/util/arangod"
"github.com/rs/zerolog"
)
@ -59,18 +57,7 @@ func (a *actionDisableMaintenance) Start(ctx context.Context) (bool, error) {
return true, nil
}
ctxChild, cancel := context.WithTimeout(ctx, arangod.GetRequestTimeout())
defer cancel()
client, err := a.actionCtx.GetDatabaseClient(ctxChild)
if err != nil {
a.log.Error().Err(err).Msgf("Unable to get agency client")
return true, nil
}
err = arangod.RunWithTimeout(ctx, func(ctxChild context.Context) error {
return agency.SetMaintenanceMode(ctxChild, client, false)
})
if err != nil {
if err := a.actionCtx.SetAgencyMaintenanceMode(ctx, false); err != nil {
a.log.Error().Err(err).Msgf("Unable to disable maintenance")
return true, nil
}

View file

@ -26,10 +26,7 @@ package reconcile
import (
"context"
"github.com/arangodb/kube-arangodb/pkg/util/arangod"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
"github.com/arangodb/kube-arangodb/pkg/deployment/agency"
"github.com/rs/zerolog"
)
@ -60,19 +57,8 @@ func (a *actionEnableMaintenance) Start(ctx context.Context) (bool, error) {
return true, nil
}
ctxChild, cancel := context.WithTimeout(ctx, arangod.GetRequestTimeout())
defer cancel()
client, err := a.actionCtx.GetDatabaseClient(ctxChild)
if err != nil {
a.log.Error().Err(err).Msgf("Unable to get agency client")
return true, nil
}
err = arangod.RunWithTimeout(ctx, func(ctxChild context.Context) error {
return agency.SetMaintenanceMode(ctxChild, client, true)
})
if err != nil {
a.log.Error().Err(err).Msgf("Unable to set maintenance")
if err := a.actionCtx.SetAgencyMaintenanceMode(ctx, true); err != nil {
a.log.Error().Err(err).Msgf("Unable to enable maintenance")
return true, nil
}

View file

@ -28,7 +28,6 @@ import (
"github.com/arangodb/go-driver"
"github.com/arangodb/kube-arangodb/pkg/util/arangod"
"github.com/arangodb/kube-arangodb/pkg/util/errors"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
@ -79,6 +78,15 @@ func (a *actionResignLeadership) Start(ctx context.Context) (bool, error) {
switch group {
case api.ServerGroupDBServers:
if enabled, err := a.actionCtx.GetAgencyMaintenanceMode(ctx); err != nil {
log.Warn().Err(err).Msgf("Maintenance is enabled, skipping action")
return true, errors.WithStack(err)
} else if enabled {
// We are done, action cannot be handled on maintenance mode
log.Warn().Msgf("Maintenance is enabled, skipping action")
return true, nil
}
ctxChild, cancel = context.WithTimeout(ctx, arangod.GetRequestTimeout())
defer cancel()
cluster, err := client.Cluster(ctxChild)
@ -119,6 +127,19 @@ func (a *actionResignLeadership) CheckProgress(ctx context.Context) (bool, bool,
return true, false, nil
}
if enabled, err := a.actionCtx.GetAgencyMaintenanceMode(ctx); err != nil {
log.Error().Err(err).Msgf("Unable to get maintenance mode")
return false, false, errors.WithStack(err)
} else if enabled {
log.Warn().Msgf("Maintenance is enabled, skipping action")
// We are done, action cannot be handled on maintenance mode
m.CleanoutJobID = ""
if err := a.actionCtx.UpdateMember(ctx, m); err != nil {
return false, false, errors.WithStack(err)
}
return true, false, nil
}
ctxChild, cancel := context.WithTimeout(ctx, arangod.GetRequestTimeout())
defer cancel()
agency, err := a.actionCtx.GetAgency(ctxChild)

View file

@ -26,6 +26,8 @@ package reconcile
import (
"context"
"github.com/arangodb/kube-arangodb/pkg/deployment/resources"
"github.com/arangodb/arangosync-client/client"
driver "github.com/arangodb/go-driver"
"github.com/arangodb/go-driver/agency"
@ -40,6 +42,9 @@ import (
// Context provides methods to the reconcile package.
type Context interface {
resources.DeploymentStatusUpdate
resources.DeploymentAgencyMaintenance
// GetAPIObject returns the deployment as k8s object.
GetAPIObject() k8sutil.APIObject
// GetSpec returns the current specification of the deployment
@ -111,8 +116,6 @@ type Context interface {
RenderPodForMember(ctx context.Context, cachedStatus inspectorInterface.Inspector, spec api.DeploymentSpec, status api.DeploymentStatus, memberID string, imageInfo api.ImageInfo) (*v1.Pod, error)
// SelectImage select currently used image by pod
SelectImage(spec api.DeploymentSpec, status api.DeploymentStatus) (api.ImageInfo, bool)
// WithStatusUpdate update status of ArangoDeployment with defined modifier. If action returns True action is taken
WithStatusUpdate(ctx context.Context, action func(s *api.DeploymentStatus) bool, force ...bool) error
// SecretsInterface return secret interface
SecretsInterface() k8sutil.SecretInterface
// GetBackup receives information about a backup resource

View file

@ -32,13 +32,22 @@ func withMaintenance(plan ...api.Action) api.Plan {
return plan
}
var newPlan api.Plan
newPlan = append(newPlan, api.NewAction(api.ActionTypeEnableMaintenance, api.ServerGroupUnknown, "", "Enable maintenance before actions"))
newPlan = append(newPlan, plan...)
newPlan = append(newPlan, api.NewAction(api.ActionTypeDisableMaintenance, api.ServerGroupUnknown, "", "Disable maintenance after actions"))
return newPlan
return withMaintenanceStart(plan...).After(api.NewAction(api.ActionTypeDisableMaintenance, api.ServerGroupUnknown, "", "Disable maintenance after actions"))
}
func withMaintenanceStart(plan ...api.Action) api.Plan {
if !features.Maintenance().Enabled() {
return plan
}
return api.AsPlan(plan).Before(
api.NewAction(api.ActionTypeEnableMaintenance, api.ServerGroupUnknown, "", "Enable maintenance before actions"),
api.NewAction(api.ActionTypeSetMaintenanceCondition, api.ServerGroupUnknown, "", "Enable maintenance before actions"))
}
func withResignLeadership(group api.ServerGroup, member api.MemberStatus, reason string, plan ...api.Action) api.Plan {
if member.Image == nil {
return plan
}
return api.AsPlan(plan).Before(api.NewAction(api.ActionTypeResignLeadership, group, member.ID, reason))
}

View file

@ -215,10 +215,6 @@ func createPlan(ctx context.Context, log zerolog.Logger, apiObject k8sutil.APIOb
plan = pb.Apply(createJWTStatusUpdate)
}
if plan.IsEmpty() {
plan = pb.Apply(createMaintenanceManagementPlan)
}
// Check for scale up/down
if plan.IsEmpty() {
plan = pb.Apply(createScaleMemberPlan)
@ -234,6 +230,11 @@ func createPlan(ctx context.Context, log zerolog.Logger, apiObject k8sutil.APIOb
plan = pb.Apply(createRotateOrUpgradePlan)
}
// Disable maintenance if upgrade process was done. Upgrade task throw IDLE Action if upgrade is pending
if plan.IsEmpty() {
plan = pb.Apply(createMaintenanceManagementPlan)
}
// Add keys
if plan.IsEmpty() {
plan = pb.ApplySubPlan(createEncryptionKeyStatusPropagatedFieldUpdate, createEncryptionKey)

View file

@ -26,10 +26,7 @@ package reconcile
import (
"context"
"github.com/arangodb/kube-arangodb/pkg/util/arangod"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
"github.com/arangodb/kube-arangodb/pkg/deployment/agency"
"github.com/arangodb/kube-arangodb/pkg/deployment/features"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector"
@ -49,30 +46,26 @@ func createMaintenanceManagementPlan(ctx context.Context,
return nil
}
ctxChild, cancel := context.WithTimeout(ctx, arangod.GetRequestTimeout())
defer cancel()
client, err := planCtx.GetDatabaseClient(ctxChild)
enabled, err := planCtx.GetAgencyMaintenanceMode(ctx)
if err != nil {
log.Error().Err(err).Msgf("Unable to get agency client")
log.Error().Err(err).Msgf("Unable to get agency mode")
return nil
}
ctxChild, cancel = context.WithTimeout(ctx, arangod.GetRequestTimeout())
defer cancel()
m, err := agency.GetMaintenanceMode(ctxChild, client)
if err != nil {
log.Error().Err(err).Msgf("Unable to get agency maintenance mode")
return nil
}
if !m.Enabled() && spec.Database.GetMaintenance() {
if !enabled && spec.Database.GetMaintenance() {
log.Info().Msgf("Enabling maintenance mode")
return api.Plan{api.NewAction(api.ActionTypeEnableMaintenance, api.ServerGroupUnknown, "")}
return api.Plan{api.NewAction(api.ActionTypeEnableMaintenance, api.ServerGroupUnknown, ""), api.NewAction(api.ActionTypeSetMaintenanceCondition, api.ServerGroupUnknown, "")}
}
if m.Enabled() && !spec.Database.GetMaintenance() {
if enabled && !spec.Database.GetMaintenance() {
log.Info().Msgf("Disabling maintenance mode")
return api.Plan{api.NewAction(api.ActionTypeDisableMaintenance, api.ServerGroupUnknown, "")}
return api.Plan{api.NewAction(api.ActionTypeDisableMaintenance, api.ServerGroupUnknown, ""), api.NewAction(api.ActionTypeSetMaintenanceCondition, api.ServerGroupUnknown, "")}
}
condition, ok := status.Conditions.Get(api.ConditionTypeMaintenanceMode)
if enabled != (ok && condition.IsTrue()) {
return api.Plan{api.NewAction(api.ActionTypeSetMaintenanceCondition, api.ServerGroupUnknown, "")}
}
return nil

View file

@ -25,6 +25,8 @@ package reconcile
import (
"context"
"github.com/arangodb/kube-arangodb/pkg/deployment/resources"
"github.com/arangodb/kube-arangodb/pkg/util/arangod/conn"
inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector"
@ -41,6 +43,9 @@ import (
// PlanBuilderContext contains context methods provided to plan builders.
type PlanBuilderContext interface {
resources.DeploymentStatusUpdate
resources.DeploymentAgencyMaintenance
// GetTLSKeyfile returns the keyfile encoded TLS certificate+key for
// the given member.
GetTLSKeyfile(group api.ServerGroup, member api.MemberStatus) (string, error)

View file

@ -74,18 +74,18 @@ func createRestorePlan(ctx context.Context,
}
}
return restorePlan(spec.Mode.Get())
return restorePlan(spec)
}
return nil
}
func restorePlan(mode api.DeploymentMode) api.Plan {
func restorePlan(spec api.DeploymentSpec) api.Plan {
p := api.Plan{
api.NewAction(api.ActionTypeBackupRestore, api.ServerGroupUnknown, ""),
}
switch mode {
switch spec.Mode.Get() {
case api.DeploymentModeActiveFailover:
p = withMaintenance(p...)
}

View file

@ -25,6 +25,8 @@ package reconcile
import (
"context"
"github.com/arangodb/kube-arangodb/pkg/deployment/features"
json "github.com/json-iterator/go"
"github.com/arangodb/kube-arangodb/pkg/deployment/pod"
@ -183,6 +185,7 @@ func createRotateOrUpgradePlanInternal(ctx context.Context, log zerolog.Logger,
}
}
}
return nil, false
}
@ -378,19 +381,34 @@ func createUpgradeMemberPlan(log zerolog.Logger, member api.MemberStatus,
api.NewAction(api.ActionTypeCleanTLSKeyfileCertificate, group, member.ID, "Remove server keyfile and enforce renewal/recreation"),
}
if status.CurrentImage == nil || status.CurrentImage.Image != spec.GetImage() {
plan = append(plan,
api.NewAction(api.ActionTypeSetCurrentImage, group, "", reason).SetImage(spec.GetImage()),
)
plan = plan.After(api.NewAction(api.ActionTypeSetCurrentImage, group, "", reason).SetImage(spec.GetImage()))
}
if member.Image == nil || member.Image.Image != spec.GetImage() {
plan = append(plan,
api.NewAction(api.ActionTypeSetMemberCurrentImage, group, member.ID, reason).SetImage(spec.GetImage()),
)
plan = plan.After(api.NewAction(api.ActionTypeSetMemberCurrentImage, group, member.ID, reason).SetImage(spec.GetImage()))
}
plan = append(plan,
api.NewAction(api.ActionTypeResignLeadership, group, member.ID, reason),
api.NewAction(upgradeAction, group, member.ID, reason),
api.NewAction(api.ActionTypeWaitForMemberUp, group, member.ID),
)
return withMaintenance(plan...)
plan = plan.After(api.NewAction(upgradeAction, group, member.ID, reason),
api.NewAction(api.ActionTypeWaitForMemberUp, group, member.ID))
return withSecureWrap(member, group, spec, plan...)
}
func withSecureWrap(member api.MemberStatus,
group api.ServerGroup, spec api.DeploymentSpec, plan ...api.Action) api.Plan {
image := member.Image
if image == nil {
return plan
}
if skipResignLeadership(spec.GetMode(), image.ArangoDBVersion) {
// In this case we skip resign leadership but we enable maintenance
return withMaintenanceStart(plan...)
} else {
return withResignLeadership(group, member, "ResignLeadership", plan...)
}
}
func skipResignLeadership(mode api.DeploymentMode, v driver.Version) bool {
return mode == api.DeploymentModeCluster && features.Maintenance().Enabled() && ((v.CompareTo("3.6.0") >= 0 && v.CompareTo("3.6.14") <= 0) ||
(v.CompareTo("3.7.0") >= 0 && v.CompareTo("3.7.12") <= 0))
}

View file

@ -29,6 +29,8 @@ import (
"io/ioutil"
"testing"
"github.com/arangodb/kube-arangodb/pkg/deployment/resources"
apiErrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime/schema"
@ -73,6 +75,18 @@ type testContext struct {
RecordedEvent *k8sutil.Event
}
func (c *testContext) GetAgencyMaintenanceMode(ctx context.Context) (bool, error) {
panic("implement me")
}
func (c *testContext) SetAgencyMaintenanceMode(ctx context.Context, enabled bool) error {
panic("implement me")
}
func (c *testContext) WithStatusUpdate(ctx context.Context, action resources.DeploymentStatusUpdateFunc, force ...bool) error {
panic("implement me")
}
func (c *testContext) GetPod(_ context.Context, podName string) (*core.Pod, error) {
if c.ErrPods != nil {
return nil, c.ErrPods
@ -109,10 +123,6 @@ func (c *testContext) SecretsInterface() k8sutil.SecretInterface {
panic("implement me")
}
func (c *testContext) WithStatusUpdate(_ context.Context, action func(s *api.DeploymentStatus) bool, force ...bool) error {
panic("implement me")
}
func (c *testContext) SelectImage(spec api.DeploymentSpec, status api.DeploymentStatus) (api.ImageInfo, bool) {
panic("implement me")
}

View file

@ -53,9 +53,26 @@ type ServerGroupIterator interface {
ForeachServerGroup(cb api.ServerGroupFunc, status *api.DeploymentStatus) error
}
type DeploymentStatusUpdateFunc func(s *api.DeploymentStatus) bool
type DeploymentStatusUpdate interface {
// WithStatusUpdate update status of ArangoDeployment with defined modifier. If action returns True action is taken
WithStatusUpdate(ctx context.Context, action DeploymentStatusUpdateFunc, force ...bool) error
}
type DeploymentAgencyMaintenance interface {
// GetAgencyMaintenanceMode returns info if maintenance mode is enabled
GetAgencyMaintenanceMode(ctx context.Context) (bool, error)
// SetAgencyMaintenanceMode set maintenance mode info
SetAgencyMaintenanceMode(ctx context.Context, enabled bool) error
}
// Context provides all functions needed by the Resources service
// to perform its service.
type Context interface {
DeploymentStatusUpdate
DeploymentAgencyMaintenance
// GetAPIObject returns the deployment as k8s object.
GetAPIObject() k8sutil.APIObject
// GetServerGroupIterator returns the deployment as ServerGroupIterator.
@ -106,8 +123,6 @@ type Context interface {
GetDatabaseClient(ctx context.Context) (driver.Client, error)
// GetAgency returns a connection to the entire agency.
GetAgency(ctx context.Context) (agency.Agency, error)
// WithStatusUpdate update status of ArangoDeployment with defined modifier. If action returns True action is taken
WithStatusUpdate(ctx context.Context, action func(s *api.DeploymentStatus) bool, force ...bool) error
// GetBackup receives information about a backup resource
GetBackup(ctx context.Context, backup string) (*backupApi.ArangoBackup, error)
GetScope() scope.Scope