1
0
Fork 0
mirror of https://github.com/arangodb/kube-arangodb.git synced 2024-12-14 11:57:37 +00:00

[Feature] Upgrade improvements (#686)

This commit is contained in:
Adam Janikowski 2021-02-10 09:17:52 +01:00 committed by GitHub
parent 147ccdda48
commit cb1f17a264
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
26 changed files with 400 additions and 66 deletions

View file

@ -2,6 +2,8 @@
## [master](https://github.com/arangodb/kube-arangodb/tree/master) (N/A)
- Add support for spec.ClusterDomain to be able to use FQDN in ArangoDB cluster communication
- Add Version Check feature with extended Upgrade checks
- Fix Upgrade failures recovery
## [1.1.3](https://github.com/arangodb/kube-arangodb/tree/1.1.3) (2020-12-16)
- Add v2alpha1 API for ArangoDeployment and ArangoDeploymentReplication

View file

@ -54,28 +54,30 @@ covers individual newer features separately.
Feature-wise production readiness table:
| Feature | Operator Version | ArangoDB Version | ArangoDB Edition | State | Enabled | Flag | Remarks |
|-----------------------------------------|------------------|------------------|-----------------------|------------|---------|------------------------------------------|--------------------------------------------------------------------------|
| Pod Disruption Budgets | 0.3.10 | Any | Community, Enterprise | Alpha | True | N/A | N/A |
| Pod Disruption Budgets | 0.3.11 | Any | Community, Enterprise | Production | True | N/A | N/A |
| Volume Resizing | 0.3.10 | Any | Community, Enterprise | Alpha | True | N/A | N/A |
| Volume Resizing | 0.3.11 | Any | Community, Enterprise | Production | True | N/A | N/A |
| Disabling of liveness probes | 0.3.10 | Any | Community, Enterprise | Alpha | True | N/A | N/A |
| Disabling of liveness probes | 0.3.11 | Any | Community, Enterprise | Production | True | N/A | N/A |
| Volume Claim Templates | 0.3.11 | Any | Community, Enterprise | Alpha | True | N/A | N/A |
| Volume Claim Templates | 1.0.0 | Any | Community, Enterprise | Production | True | N/A | N/A |
| Prometheus Metrics Exporter | 0.3.11 | Any | Community, Enterprise | Alpha | True | N/A | Prometheus required |
| Prometheus Metrics Exporter | 1.0.0 | Any | Community, Enterprise | Production | True | N/A | Prometheus required |
| Sidecar Containers | 0.3.11 | Any | Community, Enterprise | Alpha | True | N/A | N/A |
| Sidecar Containers | 1.0.0 | Any | Community, Enterprise | Production | True | N/A | N/A |
| Operator Single Mode | 1.0.4 | Any | Community, Enterprise | Production | False | --mode.single | Only 1 instance of Operator allowed in namespace when feature is enabled |
| TLS SNI Support | 1.0.3 | >= 3.7.0 | Enterprise | Production | True | --deployment.feature.tls-sni | N/A |
| TLS Runtime Rotation Support | 1.0.4 | > 3.7.0 | Enterprise | Alpha | False | --deployment.feature.tls-rotation | N/A |
| TLS Runtime Rotation Support | 1.1.0 | > 3.7.0 | Enterprise | Production | True | --deployment.feature.tls-rotation | N/A |
| JWT Rotation Support | 1.0.4 | > 3.7.0 | Enterprise | Alpha | False | --deployment.feature.jwt-rotation | N/A |
| JWT Rotation Support | 1.1.0 | > 3.7.0 | Enterprise | Production | True | --deployment.feature.jwt-rotation | N/A |
| Encryption Key Rotation Support | 1.0.4 | > 3.7.0 | Enterprise | Alpha | False | --deployment.feature.encryption-rotation | N/A |
| Operator Maintenance Management Support | 1.0.7 | >= 3.5.0 | Community, Enterprise | Alpha | False | --deployment.feature.maintenance | N/A |
| Feature | Operator Version | ArangoDB Version | ArangoDB Edition | State | Enabled | Flag | Remarks |
|-----------------------------------------|------------------|------------------|-----------------------|------------|---------|--------------------------------------------|--------------------------------------------------------------------------|
| Pod Disruption Budgets | 0.3.10 | Any | Community, Enterprise | Alpha | True | N/A | N/A |
| Pod Disruption Budgets | 0.3.11 | Any | Community, Enterprise | Production | True | N/A | N/A |
| Volume Resizing | 0.3.10 | Any | Community, Enterprise | Alpha | True | N/A | N/A |
| Volume Resizing | 0.3.11 | Any | Community, Enterprise | Production | True | N/A | N/A |
| Disabling of liveness probes | 0.3.10 | Any | Community, Enterprise | Alpha | True | N/A | N/A |
| Disabling of liveness probes | 0.3.11 | Any | Community, Enterprise | Production | True | N/A | N/A |
| Volume Claim Templates | 0.3.11 | Any | Community, Enterprise | Alpha | True | N/A | N/A |
| Volume Claim Templates | 1.0.0 | Any | Community, Enterprise | Production | True | N/A | N/A |
| Prometheus Metrics Exporter | 0.3.11 | Any | Community, Enterprise | Alpha | True | N/A | Prometheus required |
| Prometheus Metrics Exporter | 1.0.0 | Any | Community, Enterprise | Production | True | N/A | Prometheus required |
| Sidecar Containers | 0.3.11 | Any | Community, Enterprise | Alpha | True | N/A | N/A |
| Sidecar Containers | 1.0.0 | Any | Community, Enterprise | Production | True | N/A | N/A |
| Operator Single Mode | 1.0.4 | Any | Community, Enterprise | Production | False | --mode.single | Only 1 instance of Operator allowed in namespace when feature is enabled |
| TLS SNI Support | 1.0.3 | >= 3.7.0 | Enterprise | Production | True | --deployment.feature.tls-sni | N/A |
| TLS Runtime Rotation Support | 1.0.4 | > 3.7.0 | Enterprise | Alpha | False | --deployment.feature.tls-rotation | N/A |
| TLS Runtime Rotation Support | 1.1.0 | > 3.7.0 | Enterprise | Production | True | --deployment.feature.tls-rotation | N/A |
| JWT Rotation Support | 1.0.4 | > 3.7.0 | Enterprise | Alpha | False | --deployment.feature.jwt-rotation | N/A |
| JWT Rotation Support | 1.1.0 | > 3.7.0 | Enterprise | Production | True | --deployment.feature.jwt-rotation | N/A |
| Encryption Key Rotation Support | 1.0.4 | > 3.7.0 | Enterprise | Alpha | False | --deployment.feature.encryption-rotation | N/A |
| Encryption Key Rotation Support | 1.1.0 | > 3.7.0 | Enterprise | Production | True | --deployment.feature.encryption-rotation | N/A |
| Version Check | 1.1.4 | >= 3.5.0 | Community, Enterprise | Alpha | False | --deployment.feature.upgrade-version-check | N/A |
| Operator Maintenance Management Support | 1.0.7 | >= 3.5.0 | Community, Enterprise | Alpha | False | --deployment.feature.maintenance | N/A |
## Release notes for 0.3.16

View file

@ -63,6 +63,8 @@ const (
ConditionTypeUpToDate ConditionType = "UpToDate"
// ConditionTypeMarkedToRemove indicates that the member is marked to be removed.
ConditionTypeMarkedToRemove ConditionType = "MarkedToRemove"
// ConditionTypeUpgradeFailed indicates that mem
ConditionTypeUpgradeFailed ConditionType = "UpgradeFailed"
)
// Condition represents one current condition of a deployment or deployment member.

View file

@ -22,7 +22,11 @@
package v1
import driver "github.com/arangodb/go-driver"
import (
"fmt"
driver "github.com/arangodb/go-driver"
)
// ImageInfo contains an ID of an image and the ArangoDB version inside the image.
type ImageInfo struct {
@ -32,6 +36,20 @@ type ImageInfo struct {
Enterprise bool `json:"enterprise,omitempty"` // If set, this is an enterprise image
}
func (i *ImageInfo) String() string {
if i == nil {
return "undefined"
}
e := "Community"
if i.Enterprise {
e = "Enterprise"
}
return fmt.Sprintf("ArangoDB %s %s (%s)", e, string(i.ArangoDBVersion), i.Image)
}
// ImageInfoList is a list of image infos
type ImageInfoList []ImageInfo

View file

@ -69,6 +69,8 @@ type MemberStatus struct {
ImageID string `json:"image-id,omitempty"`
// Image holds image details
Image *ImageInfo `json:"image,omitempty"`
// OldImage holds old image defails
OldImage *ImageInfo `json:"old-image,omitempty"`
// Upgrade define if upgrade should be enforced during next execution
Upgrade bool `json:"upgrade,omitempty"`
// Endpoint definition how member should be reachable
@ -89,6 +91,7 @@ func (s MemberStatus) Equal(other MemberStatus) bool {
s.ArangoVersion == other.ArangoVersion &&
s.ImageID == other.ImageID &&
s.Image.Equal(other.Image) &&
s.OldImage.Equal(other.OldImage) &&
s.Upgrade == other.Upgrade &&
util.CompareStringPointers(s.Endpoint, other.Endpoint)
}

View file

@ -31,14 +31,15 @@ import (
)
const (
ServerGroupReservedInitContainerNameLifecycle = "init-lifecycle"
ServerGroupReservedInitContainerNameUUID = "uuid"
ServerGroupReservedInitContainerNameUpgrade = "upgrade"
ServerGroupReservedInitContainerNameLifecycle = "init-lifecycle"
ServerGroupReservedInitContainerNameUUID = "uuid"
ServerGroupReservedInitContainerNameUpgrade = "upgrade"
ServerGroupReservedInitContainerNameVersionCheck = "version-check"
)
func IsReservedServerGroupInitContainerName(name string) bool {
switch name {
case ServerGroupReservedInitContainerNameLifecycle, ServerGroupReservedInitContainerNameUUID, ServerGroupReservedInitContainerNameUpgrade:
case ServerGroupReservedInitContainerNameLifecycle, ServerGroupReservedInitContainerNameUUID, ServerGroupReservedInitContainerNameUpgrade, ServerGroupReservedInitContainerNameVersionCheck:
return true
default:
return false

View file

@ -850,6 +850,11 @@ func (in *MemberStatus) DeepCopyInto(out *MemberStatus) {
*out = new(ImageInfo)
**out = **in
}
if in.OldImage != nil {
in, out := &in.OldImage, &out.OldImage
*out = new(ImageInfo)
**out = **in
}
if in.Endpoint != nil {
in, out := &in.Endpoint, &out.Endpoint
*out = new(string)

View file

@ -63,6 +63,8 @@ const (
ConditionTypeUpToDate ConditionType = "UpToDate"
// ConditionTypeMarkedToRemove indicates that the member is marked to be removed.
ConditionTypeMarkedToRemove ConditionType = "MarkedToRemove"
// ConditionTypeUpgradeFailed indicates that mem
ConditionTypeUpgradeFailed ConditionType = "UpgradeFailed"
)
// Condition represents one current condition of a deployment or deployment member.

View file

@ -22,7 +22,11 @@
package v2alpha1
import driver "github.com/arangodb/go-driver"
import (
"fmt"
driver "github.com/arangodb/go-driver"
)
// ImageInfo contains an ID of an image and the ArangoDB version inside the image.
type ImageInfo struct {
@ -32,6 +36,20 @@ type ImageInfo struct {
Enterprise bool `json:"enterprise,omitempty"` // If set, this is an enterprise image
}
func (i *ImageInfo) String() string {
if i == nil {
return "undefined"
}
e := "Community"
if i.Enterprise {
e = "Enterprise"
}
return fmt.Sprintf("ArangoDB %s %s (%s)", e, string(i.ArangoDBVersion), i.Image)
}
// ImageInfoList is a list of image infos
type ImageInfoList []ImageInfo

View file

@ -69,6 +69,8 @@ type MemberStatus struct {
ImageID string `json:"image-id,omitempty"`
// Image holds image details
Image *ImageInfo `json:"image,omitempty"`
// OldImage holds old image defails
OldImage *ImageInfo `json:"old-image,omitempty"`
// Upgrade define if upgrade should be enforced during next execution
Upgrade bool `json:"upgrade,omitempty"`
// Endpoint definition how member should be reachable
@ -89,6 +91,7 @@ func (s MemberStatus) Equal(other MemberStatus) bool {
s.ArangoVersion == other.ArangoVersion &&
s.ImageID == other.ImageID &&
s.Image.Equal(other.Image) &&
s.OldImage.Equal(other.OldImage) &&
s.Upgrade == other.Upgrade &&
util.CompareStringPointers(s.Endpoint, other.Endpoint)
}

View file

@ -31,14 +31,15 @@ import (
)
const (
ServerGroupReservedInitContainerNameLifecycle = "init-lifecycle"
ServerGroupReservedInitContainerNameUUID = "uuid"
ServerGroupReservedInitContainerNameUpgrade = "upgrade"
ServerGroupReservedInitContainerNameLifecycle = "init-lifecycle"
ServerGroupReservedInitContainerNameUUID = "uuid"
ServerGroupReservedInitContainerNameUpgrade = "upgrade"
ServerGroupReservedInitContainerNameVersionCheck = "version-check"
)
func IsReservedServerGroupInitContainerName(name string) bool {
switch name {
case ServerGroupReservedInitContainerNameLifecycle, ServerGroupReservedInitContainerNameUUID, ServerGroupReservedInitContainerNameUpgrade:
case ServerGroupReservedInitContainerNameLifecycle, ServerGroupReservedInitContainerNameUUID, ServerGroupReservedInitContainerNameUpgrade, ServerGroupReservedInitContainerNameVersionCheck:
return true
default:
return false

View file

@ -850,6 +850,11 @@ func (in *MemberStatus) DeepCopyInto(out *MemberStatus) {
*out = new(ImageInfo)
**out = **in
}
if in.OldImage != nil {
in, out := &in.OldImage, &out.OldImage
*out = new(ImageInfo)
**out = **in
}
if in.Endpoint != nil {
in, out := &in.Endpoint, &out.Endpoint
*out = new(string)

View file

@ -129,6 +129,7 @@ func (d *Deployment) inspectDeployment(lastInterval util.Interval) util.Interval
func (d *Deployment) inspectDeploymentWithError(ctx context.Context, lastInterval util.Interval, cachedStatus inspector.Inspector) (nextInterval util.Interval, inspectError error) {
t := time.Now()
defer func() {
d.deps.Log.Info().Msgf("Reconciliation loop took %s", time.Since(t))
}()

View file

@ -0,0 +1,39 @@
//
// DISCLAIMER
//
// Copyright 2020 ArangoDB GmbH, Cologne, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
// Author Adam Janikowski
//
package features
func init() {
registerFeature(upgradeVersionCheck)
}
var upgradeVersionCheck Feature = &feature{
name: "upgrade-version-check",
description: "Enable initContainer with pre version check",
version: "3.5.0",
enterpriseRequired: false,
enabledByDefault: false,
}
func UpgradeVersionCheck() Feature {
return upgradeVersionCheck
}

View file

@ -0,0 +1,63 @@
//
// DISCLAIMER
//
// Copyright 2020 ArangoDB GmbH, Cologne, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
// Author Adam Janikowski
//
package pod
import (
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
"github.com/arangodb/kube-arangodb/pkg/deployment/features"
"github.com/arangodb/kube-arangodb/pkg/deployment/resources/inspector"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
core "k8s.io/api/core/v1"
)
func UpgradeVersionCheck() Builder {
return upgradeVersionCheck{}
}
type upgradeVersionCheck struct{}
func (u upgradeVersionCheck) Args(i Input) k8sutil.OptionPairs {
if features.UpgradeVersionCheck().Enabled() {
switch i.Group {
case api.ServerGroupAgents, api.ServerGroupDBServers, api.ServerGroupSingle:
return k8sutil.NewOptionPair(k8sutil.OptionPair{
Key: "--database.check-version",
Value: "true",
})
}
}
return nil
}
func (u upgradeVersionCheck) Volumes(i Input) ([]core.Volume, []core.VolumeMount) {
return nil, nil
}
func (u upgradeVersionCheck) Envs(i Input) []core.EnvVar {
return nil
}
func (u upgradeVersionCheck) Verify(i Input, cachedStatus inspector.Inspector) error {
return nil
}

View file

@ -80,6 +80,9 @@ func (a *setCurrentMemberImageAction) CheckProgress(ctx context.Context) (bool,
return false
}
if !m.Image.Equal(&imageInfo) {
m.OldImage = m.Image.DeepCopy()
}
m.Image = &imageInfo
if err := s.Members.Update(m, g); err != nil {

View file

@ -110,7 +110,32 @@ func (a *actionUpgradeMember) CheckProgress(ctx context.Context) (bool, bool, er
log.Error().Msg("No such member")
return true, false, nil
}
isUpgrading := m.Phase == api.MemberPhaseUpgrading
if isUpgrading {
if m.Conditions.IsTrue(api.ConditionTypeTerminated) {
if m.Conditions.IsTrue(api.ConditionTypeUpgradeFailed) {
a.log.Error().Msgf("Upgrade of member failed")
}
// Invalidate plan
m.Phase = ""
m.Conditions.Remove(api.ConditionTypeTerminated)
m.Conditions.Remove(api.ConditionTypeUpgradeFailed)
if m.OldImage != nil {
m.Image = m.OldImage.DeepCopy()
}
if err := a.actionCtx.UpdateMember(m); err != nil {
return false, true, nil
}
log.Error().Msgf("Upgrade failed")
return false, true, nil
}
}
log = log.With().
Str("pod-name", m.PodName).
Bool("is-upgrading", isUpgrading).Logger()
@ -128,6 +153,9 @@ func (a *actionUpgradeMember) CheckProgress(ctx context.Context) (bool, bool, er
m.Phase = api.MemberPhaseCreated
m.RecentTerminations = nil // Since we're upgrading, we do not care about old terminations.
m.CleanoutJobID = ""
if !m.OldImage.Equal(m.Image) && isUpgrading {
m.OldImage = m.Image.DeepCopy()
}
if err := a.actionCtx.UpdateMember(m); err != nil {
return false, false, errors.WithStack(err)
}

View file

@ -24,6 +24,7 @@ package reconcile
import (
"context"
"time"
"github.com/arangodb/kube-arangodb/pkg/util/errors"
@ -139,7 +140,10 @@ func (a *actionWaitForMemberUp) checkProgressAgent(ctx context.Context) (bool, b
a.Endpoints()
}
if err := agency.AreAgentsHealthy(ctx, clients); err != nil {
shortCtx, c := context.WithTimeout(ctx, 3*time.Second)
defer c()
if err := agency.AreAgentsHealthy(shortCtx, clients); err != nil {
log.Debug().Err(err).Msg("Not all agents are ready")
return false, false, nil
}

View file

@ -113,7 +113,6 @@ func createPlan(ctx context.Context, log zerolog.Logger, apiObject k8sutil.APIOb
currentPlan api.Plan, spec api.DeploymentSpec,
status api.DeploymentStatus, cachedStatus inspector.Inspector,
builderCtx PlanBuilderContext) (api.Plan, bool) {
if !currentPlan.IsEmpty() {
// Plan already exists, complete that first
return currentPlan, false
@ -313,6 +312,11 @@ type planBuilder func(ctx context.Context,
spec api.DeploymentSpec, status api.DeploymentStatus,
cachedStatus inspector.Inspector, context PlanBuilderContext) api.Plan
type planBuilderCondition func(ctx context.Context,
log zerolog.Logger, apiObject k8sutil.APIObject,
spec api.DeploymentSpec, status api.DeploymentStatus,
cachedStatus inspector.Inspector, context PlanBuilderContext) bool
type planBuilderSubPlan func(ctx context.Context,
log zerolog.Logger, apiObject k8sutil.APIObject,
spec api.DeploymentSpec, status api.DeploymentStatus,
@ -335,6 +339,7 @@ func NewWithPlanBuilder(ctx context.Context,
type WithPlanBuilder interface {
Apply(p planBuilder) api.Plan
ApplyWithCondition(c planBuilderCondition, p planBuilder) api.Plan
ApplySubPlan(p planBuilderSubPlan, plans ...planBuilder) api.Plan
}
@ -348,6 +353,14 @@ type withPlanBuilder struct {
context PlanBuilderContext
}
func (w withPlanBuilder) ApplyWithCondition(c planBuilderCondition, p planBuilder) api.Plan {
if !c(w.ctx, w.log, w.apiObject, w.spec, w.status, w.cachedStatus, w.context) {
return api.Plan{}
}
return w.Apply(p)
}
func (w withPlanBuilder) ApplySubPlan(p planBuilderSubPlan, plans ...planBuilder) api.Plan {
return p(w.ctx, w.log, w.apiObject, w.spec, w.status, w.cachedStatus, w.context, w, plans...)
}

View file

@ -26,6 +26,7 @@ import (
"context"
"fmt"
"sort"
"time"
"github.com/arangodb/kube-arangodb/pkg/util/errors"
@ -220,12 +221,16 @@ func createJWTStatusUpdateRequired(ctx context.Context,
func areJWTTokensUpToDate(ctx context.Context,
log zerolog.Logger, apiObject k8sutil.APIObject,
spec api.DeploymentSpec, status api.DeploymentStatus,
cachedStatus inspector.Inspector, context PlanBuilderContext,
cachedStatus inspector.Inspector, planCtx PlanBuilderContext,
folder *core.Secret) (plan api.Plan, failed bool) {
gCtx, c := context.WithTimeout(ctx, 2*time.Second)
defer c()
status.Members.ForeachServerGroup(func(group api.ServerGroup, list api.MemberStatusList) error {
for _, m := range list {
if updateRequired, failedMember := isJWTTokenUpToDate(ctx, log, apiObject, spec, status, cachedStatus, context, group, m, folder); failedMember {
nCtx, c := context.WithTimeout(gCtx, 500*time.Millisecond)
defer c()
if updateRequired, failedMember := isJWTTokenUpToDate(nCtx, log, apiObject, spec, status, cachedStatus, planCtx, group, m, folder); failedMember {
failed = true
continue
} else if updateRequired {

View file

@ -290,7 +290,7 @@ func createCACleanPlan(ctx context.Context,
func createKeyfileRenewalPlanDefault(ctx context.Context,
log zerolog.Logger, apiObject k8sutil.APIObject,
spec api.DeploymentSpec, status api.DeploymentStatus,
cachedStatus inspector.Inspector, context PlanBuilderContext) api.Plan {
cachedStatus inspector.Inspector, planCtx PlanBuilderContext) api.Plan {
if !spec.TLS.IsSecure() {
return nil
}
@ -306,7 +306,11 @@ func createKeyfileRenewalPlanDefault(ctx context.Context,
if !plan.IsEmpty() {
return nil
}
if renew, recreate := keyfileRenewalRequired(ctx, log, apiObject, spec, status, cachedStatus, context, group, member, api.TLSRotateModeRecreate); renew {
lCtx, c := context.WithTimeout(ctx, 500*time.Millisecond)
defer c()
if renew, recreate := keyfileRenewalRequired(lCtx, log, apiObject, spec, status, cachedStatus, planCtx, group, member, api.TLSRotateModeRecreate); renew {
log.Info().Msg("Renewal of keyfile required - Recreate")
if recreate {
plan = append(plan, api.NewAction(api.ActionTypeCleanTLSKeyfileCertificate, group, member.ID, "Remove server keyfile and enforce renewal"))
@ -324,7 +328,7 @@ func createKeyfileRenewalPlanDefault(ctx context.Context,
func createKeyfileRenewalPlanInPlace(ctx context.Context,
log zerolog.Logger, apiObject k8sutil.APIObject,
spec api.DeploymentSpec, status api.DeploymentStatus,
cachedStatus inspector.Inspector, context PlanBuilderContext) api.Plan {
cachedStatus inspector.Inspector, planCtx PlanBuilderContext) api.Plan {
if !spec.TLS.IsSecure() {
return nil
}
@ -337,7 +341,10 @@ func createKeyfileRenewalPlanInPlace(ctx context.Context,
}
for _, member := range members {
if renew, recreate := keyfileRenewalRequired(ctx, log, apiObject, spec, status, cachedStatus, context, group, member, api.TLSRotateModeInPlace); renew {
lCtx, c := context.WithTimeout(ctx, 500*time.Millisecond)
defer c()
if renew, recreate := keyfileRenewalRequired(lCtx, log, apiObject, spec, status, cachedStatus, planCtx, group, member, api.TLSRotateModeInPlace); renew {
log.Info().Msg("Renewal of keyfile required - InPlace")
if recreate {
plan = append(plan, api.NewAction(api.ActionTypeCleanTLSKeyfileCertificate, group, member.ID, "Remove server keyfile and enforce renewal"))
@ -355,16 +362,19 @@ func createKeyfileRenewalPlanInPlace(ctx context.Context,
func createKeyfileRenewalPlan(ctx context.Context,
log zerolog.Logger, apiObject k8sutil.APIObject,
spec api.DeploymentSpec, status api.DeploymentStatus,
cachedStatus inspector.Inspector, context PlanBuilderContext) api.Plan {
cachedStatus inspector.Inspector, planCtx PlanBuilderContext) api.Plan {
if !spec.TLS.IsSecure() {
return nil
}
gCtx, c := context.WithTimeout(ctx, 2*time.Second)
defer c()
switch createKeyfileRenewalPlanMode(spec, status) {
case api.TLSRotateModeInPlace:
return createKeyfileRenewalPlanInPlace(ctx, log, apiObject, spec, status, cachedStatus, context)
return createKeyfileRenewalPlanInPlace(gCtx, log, apiObject, spec, status, cachedStatus, planCtx)
default:
return createKeyfileRenewalPlanDefault(ctx, log, apiObject, spec, status, cachedStatus, context)
return createKeyfileRenewalPlanDefault(gCtx, log, apiObject, spec, status, cachedStatus, planCtx)
}
}
@ -420,6 +430,8 @@ func checkServerValidCertRequest(ctx context.Context, context PlanBuilderContext
return nil, err
}
req = req.WithContext(ctx)
if auth != nil && auth.Type() == driver.AuthenticationTypeRaw {
if h := auth.Get("value"); h != "" {
req.Header.Add("Authorization", h)

View file

@ -61,7 +61,7 @@ func versionHasAdvertisedEndpoint(v driver.Version) bool {
return v.CompareTo("3.4.0") >= 0
}
// createArangodArgs creates command line arguments for an arangod server in the given group.
// createArangodArgsWithUpgrade creates command line arguments for an arangod server upgrade in the given group.
func createArangodArgsWithUpgrade(input pod.Input, additionalOptions ...k8sutil.OptionPair) []string {
return createArangodArgs(input, pod.AutoUpgrade().Args(input)...)
}
@ -484,8 +484,8 @@ func (r *Resources) createPodForMember(spec api.DeploymentSpec, memberID string,
m.PodUID = uid
m.PodSpecVersion = sha
m.Endpoint = util.NewString(k8sutil.CreatePodDNSNameWithDomain(apiObject, spec.ClusterDomain, role, m.ID))
m.ArangoVersion = status.CurrentImage.ArangoDBVersion
m.ImageID = status.CurrentImage.ImageID
m.ArangoVersion = m.Image.ArangoDBVersion
m.ImageID = m.Image.ImageID
// Check for missing side cars in
m.SideCarSpecs = make(map[string]core.Container)
@ -543,6 +543,8 @@ func (r *Resources) createPodForMember(spec api.DeploymentSpec, memberID string,
m.Conditions.Remove(api.ConditionTypeTerminating)
m.Conditions.Remove(api.ConditionTypeAgentRecoveryNeeded)
m.Conditions.Remove(api.ConditionTypeAutoUpgrade)
m.Conditions.Remove(api.ConditionTypeUpgradeFailed)
m.Upgrade = false
if err := status.Members.Update(m, group); err != nil {
return errors.WithStack(err)
}

View file

@ -426,7 +426,7 @@ func (m *MemberArangoDPod) GetInitContainers() ([]core.Container, error) {
{
// Upgrade container - run in background
if m.autoUpgrade {
if m.autoUpgrade || m.status.Upgrade {
args := createArangodArgsWithUpgrade(m.AsInput())
c, err := k8sutil.NewContainer(args, m.GetContainerCreator())
@ -443,6 +443,28 @@ func (m *MemberArangoDPod) GetInitContainers() ([]core.Container, error) {
initContainers = append(initContainers, c)
}
// VersionCheck Container
{
versionArgs := pod.UpgradeVersionCheck().Args(m.AsInput())
if len(versionArgs) > 0 {
args := createArangodArgs(m.AsInput(), versionArgs...)
c, err := k8sutil.NewContainer(args, m.GetContainerCreator())
if err != nil {
return nil, err
}
_, c.VolumeMounts = m.GetVolumes()
c.Name = api.ServerGroupReservedInitContainerNameVersionCheck
c.Lifecycle = nil
c.LivenessProbe = nil
c.ReadinessProbe = nil
initContainers = append(initContainers, c)
}
}
}
return initContainers, nil

View file

@ -113,20 +113,64 @@ func (r *Resources) InspectPods(ctx context.Context, cachedStatus inspector.Insp
// Pod has terminated with at least 1 container with a non-zero exit code.
wasTerminated := memberStatus.Conditions.IsTrue(api.ConditionTypeTerminated)
if memberStatus.Conditions.Update(api.ConditionTypeTerminated, true, "Pod Failed", "") {
if c, ok := k8sutil.GetContainerStatusByName(pod, k8sutil.ServerContainerName); ok {
if t := c.State.Terminated; t != nil {
log.Warn().Str("member", memberStatus.ID).
Str("pod", pod.GetName()).
Str("uid", string(pod.GetUID())).
Int32("exit-code", t.ExitCode).
Str("reason", t.Reason).
Str("message", t.Message).
Int32("signal", t.Signal).
Time("started", t.StartedAt.Time).
Time("finished", t.FinishedAt.Time).
Msgf("Pod failed in unexpected way")
if containers := k8sutil.GetFailedContainerNames(pod.Status.InitContainerStatuses); len(containers) > 0 {
for _, container := range containers {
switch container {
case api.ServerGroupReservedInitContainerNameVersionCheck:
if c, ok := k8sutil.GetAnyContainerStatusByName(pod.Status.InitContainerStatuses, container); ok {
if t := c.State.Terminated; t != nil {
if t := c.State.Terminated; t != nil && t.ExitCode == 11 {
memberStatus.Upgrade = true
updateMemberStatusNeeded = true
}
}
}
case api.ServerGroupReservedInitContainerNameUpgrade:
memberStatus.Conditions.Update(api.ConditionTypeUpgradeFailed, true, "Upgrade Failed", "")
}
if c, ok := k8sutil.GetAnyContainerStatusByName(pod.Status.InitContainerStatuses, container); ok {
if t := c.State.Terminated; t != nil {
if t := c.State.Terminated; t != nil && t.ExitCode != 0 {
log.Warn().Str("member", memberStatus.ID).
Str("pod", pod.GetName()).
Str("container", container).
Str("uid", string(pod.GetUID())).
Int32("exit-code", t.ExitCode).
Str("reason", t.Reason).
Str("message", t.Message).
Int32("signal", t.Signal).
Time("started", t.StartedAt.Time).
Time("finished", t.FinishedAt.Time).
Msgf("Pod failed in unexpected way: Init Container failed")
}
}
}
}
}
if containers := k8sutil.GetFailedContainerNames(pod.Status.ContainerStatuses); len(containers) > 0 {
for _, container := range containers {
if c, ok := k8sutil.GetAnyContainerStatusByName(pod.Status.ContainerStatuses, container); ok {
if t := c.State.Terminated; t != nil {
if t := c.State.Terminated; t != nil && t.ExitCode != 0 {
log.Warn().Str("member", memberStatus.ID).
Str("pod", pod.GetName()).
Str("container", container).
Str("uid", string(pod.GetUID())).
Int32("exit-code", t.ExitCode).
Str("reason", t.Reason).
Str("message", t.Message).
Int32("signal", t.Signal).
Time("started", t.StartedAt.Time).
Time("finished", t.FinishedAt.Time).
Msgf("Pod failed in unexpected way: Core Container failed")
}
}
}
}
}
log.Debug().Str("pod-name", pod.GetName()).Msg("Updating member condition Terminated to true: Pod Failed")
updateMemberStatusNeeded = true
nextInterval = nextInterval.ReduceTo(recheckSoonPodInspectorInterval)

View file

@ -22,33 +22,57 @@
package k8sutil
import v1 "k8s.io/api/core/v1"
import core "k8s.io/api/core/v1"
// GetContainerByName returns the container in the given pod with the given name.
// Returns false if not found.
func GetContainerByName(p *v1.Pod, name string) (v1.Container, bool) {
func GetContainerByName(p *core.Pod, name string) (core.Container, bool) {
for _, c := range p.Spec.Containers {
if c.Name == name {
return c, true
}
}
return v1.Container{}, false
return core.Container{}, false
}
// GetContainerStatusByName returns the container status in the given pod with the given name.
// Returns false if not found.
func GetContainerStatusByName(p *v1.Pod, name string) (v1.ContainerStatus, bool) {
func GetContainerStatusByName(p *core.Pod, name string) (core.ContainerStatus, bool) {
for _, c := range p.Status.ContainerStatuses {
if c.Name == name {
return c, true
}
}
return v1.ContainerStatus{}, false
return core.ContainerStatus{}, false
}
// GetAnyContainerStatusByName returns the container status in the given ContainerStatus list with the given name.
// Returns false if not found.
func GetAnyContainerStatusByName(containers []core.ContainerStatus, name string) (core.ContainerStatus, bool) {
for _, c := range containers {
if c.Name == name {
return c, true
}
}
return core.ContainerStatus{}, false
}
// GetFailedContainerNames returns list of failed containers from provided list of statuses.
func GetFailedContainerNames(containers []core.ContainerStatus) []string {
var failedContainers []string
for _, c := range containers {
if IsContainerFailed(&c) {
failedContainers = append(failedContainers, c.Name)
}
}
return failedContainers
}
// IsResourceRequirementsChanged returns true if the resource requirements have changed.
func IsResourceRequirementsChanged(wanted, given v1.ResourceRequirements) bool {
checkList := func(wanted, given v1.ResourceList) bool {
func IsResourceRequirementsChanged(wanted, given core.ResourceRequirements) bool {
checkList := func(wanted, given core.ResourceList) bool {
for k, v := range wanted {
if gv, ok := given[k]; !ok {
return true

View file

@ -121,6 +121,18 @@ func IsPodFailed(pod *core.Pod) bool {
}
}
// IsContainerFailed returns true if the arangodb container
// has terminated wih a non-zero exit code.
func IsContainerFailed(container *core.ContainerStatus) bool {
if c := container.State.Terminated; c != nil {
if c.ExitCode != 0 {
return true
}
}
return false
}
// IsPodScheduled returns true if the pod has been scheduled.
func IsPodScheduled(pod *core.Pod) bool {
condition := getPodCondition(&pod.Status, core.PodScheduled)