mirror of
https://github.com/arangodb/kube-arangodb.git
synced 2024-12-14 11:57:37 +00:00
[Feature] Upgrade improvements (#686)
This commit is contained in:
parent
147ccdda48
commit
cb1f17a264
26 changed files with 400 additions and 66 deletions
|
@ -2,6 +2,8 @@
|
|||
|
||||
## [master](https://github.com/arangodb/kube-arangodb/tree/master) (N/A)
|
||||
- Add support for spec.ClusterDomain to be able to use FQDN in ArangoDB cluster communication
|
||||
- Add Version Check feature with extended Upgrade checks
|
||||
- Fix Upgrade failures recovery
|
||||
|
||||
## [1.1.3](https://github.com/arangodb/kube-arangodb/tree/1.1.3) (2020-12-16)
|
||||
- Add v2alpha1 API for ArangoDeployment and ArangoDeploymentReplication
|
||||
|
|
46
README.md
46
README.md
|
@ -54,28 +54,30 @@ covers individual newer features separately.
|
|||
|
||||
Feature-wise production readiness table:
|
||||
|
||||
| Feature | Operator Version | ArangoDB Version | ArangoDB Edition | State | Enabled | Flag | Remarks |
|
||||
|-----------------------------------------|------------------|------------------|-----------------------|------------|---------|------------------------------------------|--------------------------------------------------------------------------|
|
||||
| Pod Disruption Budgets | 0.3.10 | Any | Community, Enterprise | Alpha | True | N/A | N/A |
|
||||
| Pod Disruption Budgets | 0.3.11 | Any | Community, Enterprise | Production | True | N/A | N/A |
|
||||
| Volume Resizing | 0.3.10 | Any | Community, Enterprise | Alpha | True | N/A | N/A |
|
||||
| Volume Resizing | 0.3.11 | Any | Community, Enterprise | Production | True | N/A | N/A |
|
||||
| Disabling of liveness probes | 0.3.10 | Any | Community, Enterprise | Alpha | True | N/A | N/A |
|
||||
| Disabling of liveness probes | 0.3.11 | Any | Community, Enterprise | Production | True | N/A | N/A |
|
||||
| Volume Claim Templates | 0.3.11 | Any | Community, Enterprise | Alpha | True | N/A | N/A |
|
||||
| Volume Claim Templates | 1.0.0 | Any | Community, Enterprise | Production | True | N/A | N/A |
|
||||
| Prometheus Metrics Exporter | 0.3.11 | Any | Community, Enterprise | Alpha | True | N/A | Prometheus required |
|
||||
| Prometheus Metrics Exporter | 1.0.0 | Any | Community, Enterprise | Production | True | N/A | Prometheus required |
|
||||
| Sidecar Containers | 0.3.11 | Any | Community, Enterprise | Alpha | True | N/A | N/A |
|
||||
| Sidecar Containers | 1.0.0 | Any | Community, Enterprise | Production | True | N/A | N/A |
|
||||
| Operator Single Mode | 1.0.4 | Any | Community, Enterprise | Production | False | --mode.single | Only 1 instance of Operator allowed in namespace when feature is enabled |
|
||||
| TLS SNI Support | 1.0.3 | >= 3.7.0 | Enterprise | Production | True | --deployment.feature.tls-sni | N/A |
|
||||
| TLS Runtime Rotation Support | 1.0.4 | > 3.7.0 | Enterprise | Alpha | False | --deployment.feature.tls-rotation | N/A |
|
||||
| TLS Runtime Rotation Support | 1.1.0 | > 3.7.0 | Enterprise | Production | True | --deployment.feature.tls-rotation | N/A |
|
||||
| JWT Rotation Support | 1.0.4 | > 3.7.0 | Enterprise | Alpha | False | --deployment.feature.jwt-rotation | N/A |
|
||||
| JWT Rotation Support | 1.1.0 | > 3.7.0 | Enterprise | Production | True | --deployment.feature.jwt-rotation | N/A |
|
||||
| Encryption Key Rotation Support | 1.0.4 | > 3.7.0 | Enterprise | Alpha | False | --deployment.feature.encryption-rotation | N/A |
|
||||
| Operator Maintenance Management Support | 1.0.7 | >= 3.5.0 | Community, Enterprise | Alpha | False | --deployment.feature.maintenance | N/A |
|
||||
| Feature | Operator Version | ArangoDB Version | ArangoDB Edition | State | Enabled | Flag | Remarks |
|
||||
|-----------------------------------------|------------------|------------------|-----------------------|------------|---------|--------------------------------------------|--------------------------------------------------------------------------|
|
||||
| Pod Disruption Budgets | 0.3.10 | Any | Community, Enterprise | Alpha | True | N/A | N/A |
|
||||
| Pod Disruption Budgets | 0.3.11 | Any | Community, Enterprise | Production | True | N/A | N/A |
|
||||
| Volume Resizing | 0.3.10 | Any | Community, Enterprise | Alpha | True | N/A | N/A |
|
||||
| Volume Resizing | 0.3.11 | Any | Community, Enterprise | Production | True | N/A | N/A |
|
||||
| Disabling of liveness probes | 0.3.10 | Any | Community, Enterprise | Alpha | True | N/A | N/A |
|
||||
| Disabling of liveness probes | 0.3.11 | Any | Community, Enterprise | Production | True | N/A | N/A |
|
||||
| Volume Claim Templates | 0.3.11 | Any | Community, Enterprise | Alpha | True | N/A | N/A |
|
||||
| Volume Claim Templates | 1.0.0 | Any | Community, Enterprise | Production | True | N/A | N/A |
|
||||
| Prometheus Metrics Exporter | 0.3.11 | Any | Community, Enterprise | Alpha | True | N/A | Prometheus required |
|
||||
| Prometheus Metrics Exporter | 1.0.0 | Any | Community, Enterprise | Production | True | N/A | Prometheus required |
|
||||
| Sidecar Containers | 0.3.11 | Any | Community, Enterprise | Alpha | True | N/A | N/A |
|
||||
| Sidecar Containers | 1.0.0 | Any | Community, Enterprise | Production | True | N/A | N/A |
|
||||
| Operator Single Mode | 1.0.4 | Any | Community, Enterprise | Production | False | --mode.single | Only 1 instance of Operator allowed in namespace when feature is enabled |
|
||||
| TLS SNI Support | 1.0.3 | >= 3.7.0 | Enterprise | Production | True | --deployment.feature.tls-sni | N/A |
|
||||
| TLS Runtime Rotation Support | 1.0.4 | > 3.7.0 | Enterprise | Alpha | False | --deployment.feature.tls-rotation | N/A |
|
||||
| TLS Runtime Rotation Support | 1.1.0 | > 3.7.0 | Enterprise | Production | True | --deployment.feature.tls-rotation | N/A |
|
||||
| JWT Rotation Support | 1.0.4 | > 3.7.0 | Enterprise | Alpha | False | --deployment.feature.jwt-rotation | N/A |
|
||||
| JWT Rotation Support | 1.1.0 | > 3.7.0 | Enterprise | Production | True | --deployment.feature.jwt-rotation | N/A |
|
||||
| Encryption Key Rotation Support | 1.0.4 | > 3.7.0 | Enterprise | Alpha | False | --deployment.feature.encryption-rotation | N/A |
|
||||
| Encryption Key Rotation Support | 1.1.0 | > 3.7.0 | Enterprise | Production | True | --deployment.feature.encryption-rotation | N/A |
|
||||
| Version Check | 1.1.4 | >= 3.5.0 | Community, Enterprise | Alpha | False | --deployment.feature.upgrade-version-check | N/A |
|
||||
| Operator Maintenance Management Support | 1.0.7 | >= 3.5.0 | Community, Enterprise | Alpha | False | --deployment.feature.maintenance | N/A |
|
||||
|
||||
## Release notes for 0.3.16
|
||||
|
||||
|
|
|
@ -63,6 +63,8 @@ const (
|
|||
ConditionTypeUpToDate ConditionType = "UpToDate"
|
||||
// ConditionTypeMarkedToRemove indicates that the member is marked to be removed.
|
||||
ConditionTypeMarkedToRemove ConditionType = "MarkedToRemove"
|
||||
// ConditionTypeUpgradeFailed indicates that mem
|
||||
ConditionTypeUpgradeFailed ConditionType = "UpgradeFailed"
|
||||
)
|
||||
|
||||
// Condition represents one current condition of a deployment or deployment member.
|
||||
|
|
|
@ -22,7 +22,11 @@
|
|||
|
||||
package v1
|
||||
|
||||
import driver "github.com/arangodb/go-driver"
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
driver "github.com/arangodb/go-driver"
|
||||
)
|
||||
|
||||
// ImageInfo contains an ID of an image and the ArangoDB version inside the image.
|
||||
type ImageInfo struct {
|
||||
|
@ -32,6 +36,20 @@ type ImageInfo struct {
|
|||
Enterprise bool `json:"enterprise,omitempty"` // If set, this is an enterprise image
|
||||
}
|
||||
|
||||
func (i *ImageInfo) String() string {
|
||||
if i == nil {
|
||||
return "undefined"
|
||||
}
|
||||
|
||||
e := "Community"
|
||||
|
||||
if i.Enterprise {
|
||||
e = "Enterprise"
|
||||
}
|
||||
|
||||
return fmt.Sprintf("ArangoDB %s %s (%s)", e, string(i.ArangoDBVersion), i.Image)
|
||||
}
|
||||
|
||||
// ImageInfoList is a list of image infos
|
||||
type ImageInfoList []ImageInfo
|
||||
|
||||
|
|
|
@ -69,6 +69,8 @@ type MemberStatus struct {
|
|||
ImageID string `json:"image-id,omitempty"`
|
||||
// Image holds image details
|
||||
Image *ImageInfo `json:"image,omitempty"`
|
||||
// OldImage holds old image defails
|
||||
OldImage *ImageInfo `json:"old-image,omitempty"`
|
||||
// Upgrade define if upgrade should be enforced during next execution
|
||||
Upgrade bool `json:"upgrade,omitempty"`
|
||||
// Endpoint definition how member should be reachable
|
||||
|
@ -89,6 +91,7 @@ func (s MemberStatus) Equal(other MemberStatus) bool {
|
|||
s.ArangoVersion == other.ArangoVersion &&
|
||||
s.ImageID == other.ImageID &&
|
||||
s.Image.Equal(other.Image) &&
|
||||
s.OldImage.Equal(other.OldImage) &&
|
||||
s.Upgrade == other.Upgrade &&
|
||||
util.CompareStringPointers(s.Endpoint, other.Endpoint)
|
||||
}
|
||||
|
|
|
@ -31,14 +31,15 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
ServerGroupReservedInitContainerNameLifecycle = "init-lifecycle"
|
||||
ServerGroupReservedInitContainerNameUUID = "uuid"
|
||||
ServerGroupReservedInitContainerNameUpgrade = "upgrade"
|
||||
ServerGroupReservedInitContainerNameLifecycle = "init-lifecycle"
|
||||
ServerGroupReservedInitContainerNameUUID = "uuid"
|
||||
ServerGroupReservedInitContainerNameUpgrade = "upgrade"
|
||||
ServerGroupReservedInitContainerNameVersionCheck = "version-check"
|
||||
)
|
||||
|
||||
func IsReservedServerGroupInitContainerName(name string) bool {
|
||||
switch name {
|
||||
case ServerGroupReservedInitContainerNameLifecycle, ServerGroupReservedInitContainerNameUUID, ServerGroupReservedInitContainerNameUpgrade:
|
||||
case ServerGroupReservedInitContainerNameLifecycle, ServerGroupReservedInitContainerNameUUID, ServerGroupReservedInitContainerNameUpgrade, ServerGroupReservedInitContainerNameVersionCheck:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
|
|
5
pkg/apis/deployment/v1/zz_generated.deepcopy.go
generated
5
pkg/apis/deployment/v1/zz_generated.deepcopy.go
generated
|
@ -850,6 +850,11 @@ func (in *MemberStatus) DeepCopyInto(out *MemberStatus) {
|
|||
*out = new(ImageInfo)
|
||||
**out = **in
|
||||
}
|
||||
if in.OldImage != nil {
|
||||
in, out := &in.OldImage, &out.OldImage
|
||||
*out = new(ImageInfo)
|
||||
**out = **in
|
||||
}
|
||||
if in.Endpoint != nil {
|
||||
in, out := &in.Endpoint, &out.Endpoint
|
||||
*out = new(string)
|
||||
|
|
|
@ -63,6 +63,8 @@ const (
|
|||
ConditionTypeUpToDate ConditionType = "UpToDate"
|
||||
// ConditionTypeMarkedToRemove indicates that the member is marked to be removed.
|
||||
ConditionTypeMarkedToRemove ConditionType = "MarkedToRemove"
|
||||
// ConditionTypeUpgradeFailed indicates that mem
|
||||
ConditionTypeUpgradeFailed ConditionType = "UpgradeFailed"
|
||||
)
|
||||
|
||||
// Condition represents one current condition of a deployment or deployment member.
|
||||
|
|
|
@ -22,7 +22,11 @@
|
|||
|
||||
package v2alpha1
|
||||
|
||||
import driver "github.com/arangodb/go-driver"
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
driver "github.com/arangodb/go-driver"
|
||||
)
|
||||
|
||||
// ImageInfo contains an ID of an image and the ArangoDB version inside the image.
|
||||
type ImageInfo struct {
|
||||
|
@ -32,6 +36,20 @@ type ImageInfo struct {
|
|||
Enterprise bool `json:"enterprise,omitempty"` // If set, this is an enterprise image
|
||||
}
|
||||
|
||||
func (i *ImageInfo) String() string {
|
||||
if i == nil {
|
||||
return "undefined"
|
||||
}
|
||||
|
||||
e := "Community"
|
||||
|
||||
if i.Enterprise {
|
||||
e = "Enterprise"
|
||||
}
|
||||
|
||||
return fmt.Sprintf("ArangoDB %s %s (%s)", e, string(i.ArangoDBVersion), i.Image)
|
||||
}
|
||||
|
||||
// ImageInfoList is a list of image infos
|
||||
type ImageInfoList []ImageInfo
|
||||
|
||||
|
|
|
@ -69,6 +69,8 @@ type MemberStatus struct {
|
|||
ImageID string `json:"image-id,omitempty"`
|
||||
// Image holds image details
|
||||
Image *ImageInfo `json:"image,omitempty"`
|
||||
// OldImage holds old image defails
|
||||
OldImage *ImageInfo `json:"old-image,omitempty"`
|
||||
// Upgrade define if upgrade should be enforced during next execution
|
||||
Upgrade bool `json:"upgrade,omitempty"`
|
||||
// Endpoint definition how member should be reachable
|
||||
|
@ -89,6 +91,7 @@ func (s MemberStatus) Equal(other MemberStatus) bool {
|
|||
s.ArangoVersion == other.ArangoVersion &&
|
||||
s.ImageID == other.ImageID &&
|
||||
s.Image.Equal(other.Image) &&
|
||||
s.OldImage.Equal(other.OldImage) &&
|
||||
s.Upgrade == other.Upgrade &&
|
||||
util.CompareStringPointers(s.Endpoint, other.Endpoint)
|
||||
}
|
||||
|
|
|
@ -31,14 +31,15 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
ServerGroupReservedInitContainerNameLifecycle = "init-lifecycle"
|
||||
ServerGroupReservedInitContainerNameUUID = "uuid"
|
||||
ServerGroupReservedInitContainerNameUpgrade = "upgrade"
|
||||
ServerGroupReservedInitContainerNameLifecycle = "init-lifecycle"
|
||||
ServerGroupReservedInitContainerNameUUID = "uuid"
|
||||
ServerGroupReservedInitContainerNameUpgrade = "upgrade"
|
||||
ServerGroupReservedInitContainerNameVersionCheck = "version-check"
|
||||
)
|
||||
|
||||
func IsReservedServerGroupInitContainerName(name string) bool {
|
||||
switch name {
|
||||
case ServerGroupReservedInitContainerNameLifecycle, ServerGroupReservedInitContainerNameUUID, ServerGroupReservedInitContainerNameUpgrade:
|
||||
case ServerGroupReservedInitContainerNameLifecycle, ServerGroupReservedInitContainerNameUUID, ServerGroupReservedInitContainerNameUpgrade, ServerGroupReservedInitContainerNameVersionCheck:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
|
|
|
@ -850,6 +850,11 @@ func (in *MemberStatus) DeepCopyInto(out *MemberStatus) {
|
|||
*out = new(ImageInfo)
|
||||
**out = **in
|
||||
}
|
||||
if in.OldImage != nil {
|
||||
in, out := &in.OldImage, &out.OldImage
|
||||
*out = new(ImageInfo)
|
||||
**out = **in
|
||||
}
|
||||
if in.Endpoint != nil {
|
||||
in, out := &in.Endpoint, &out.Endpoint
|
||||
*out = new(string)
|
||||
|
|
|
@ -129,6 +129,7 @@ func (d *Deployment) inspectDeployment(lastInterval util.Interval) util.Interval
|
|||
|
||||
func (d *Deployment) inspectDeploymentWithError(ctx context.Context, lastInterval util.Interval, cachedStatus inspector.Inspector) (nextInterval util.Interval, inspectError error) {
|
||||
t := time.Now()
|
||||
|
||||
defer func() {
|
||||
d.deps.Log.Info().Msgf("Reconciliation loop took %s", time.Since(t))
|
||||
}()
|
||||
|
|
39
pkg/deployment/features/upgrade.go
Normal file
39
pkg/deployment/features/upgrade.go
Normal file
|
@ -0,0 +1,39 @@
|
|||
//
|
||||
// DISCLAIMER
|
||||
//
|
||||
// Copyright 2020 ArangoDB GmbH, Cologne, Germany
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
//
|
||||
// Author Adam Janikowski
|
||||
//
|
||||
|
||||
package features
|
||||
|
||||
func init() {
|
||||
registerFeature(upgradeVersionCheck)
|
||||
}
|
||||
|
||||
var upgradeVersionCheck Feature = &feature{
|
||||
name: "upgrade-version-check",
|
||||
description: "Enable initContainer with pre version check",
|
||||
version: "3.5.0",
|
||||
enterpriseRequired: false,
|
||||
enabledByDefault: false,
|
||||
}
|
||||
|
||||
func UpgradeVersionCheck() Feature {
|
||||
return upgradeVersionCheck
|
||||
}
|
63
pkg/deployment/pod/upgrade_version_check.go
Normal file
63
pkg/deployment/pod/upgrade_version_check.go
Normal file
|
@ -0,0 +1,63 @@
|
|||
//
|
||||
// DISCLAIMER
|
||||
//
|
||||
// Copyright 2020 ArangoDB GmbH, Cologne, Germany
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
//
|
||||
// Author Adam Janikowski
|
||||
//
|
||||
|
||||
package pod
|
||||
|
||||
import (
|
||||
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
|
||||
"github.com/arangodb/kube-arangodb/pkg/deployment/features"
|
||||
"github.com/arangodb/kube-arangodb/pkg/deployment/resources/inspector"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
|
||||
core "k8s.io/api/core/v1"
|
||||
)
|
||||
|
||||
func UpgradeVersionCheck() Builder {
|
||||
return upgradeVersionCheck{}
|
||||
}
|
||||
|
||||
type upgradeVersionCheck struct{}
|
||||
|
||||
func (u upgradeVersionCheck) Args(i Input) k8sutil.OptionPairs {
|
||||
if features.UpgradeVersionCheck().Enabled() {
|
||||
switch i.Group {
|
||||
case api.ServerGroupAgents, api.ServerGroupDBServers, api.ServerGroupSingle:
|
||||
return k8sutil.NewOptionPair(k8sutil.OptionPair{
|
||||
Key: "--database.check-version",
|
||||
Value: "true",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (u upgradeVersionCheck) Volumes(i Input) ([]core.Volume, []core.VolumeMount) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (u upgradeVersionCheck) Envs(i Input) []core.EnvVar {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (u upgradeVersionCheck) Verify(i Input, cachedStatus inspector.Inspector) error {
|
||||
return nil
|
||||
}
|
|
@ -80,6 +80,9 @@ func (a *setCurrentMemberImageAction) CheckProgress(ctx context.Context) (bool,
|
|||
return false
|
||||
}
|
||||
|
||||
if !m.Image.Equal(&imageInfo) {
|
||||
m.OldImage = m.Image.DeepCopy()
|
||||
}
|
||||
m.Image = &imageInfo
|
||||
|
||||
if err := s.Members.Update(m, g); err != nil {
|
||||
|
|
|
@ -110,7 +110,32 @@ func (a *actionUpgradeMember) CheckProgress(ctx context.Context) (bool, bool, er
|
|||
log.Error().Msg("No such member")
|
||||
return true, false, nil
|
||||
}
|
||||
|
||||
isUpgrading := m.Phase == api.MemberPhaseUpgrading
|
||||
|
||||
if isUpgrading {
|
||||
if m.Conditions.IsTrue(api.ConditionTypeTerminated) {
|
||||
if m.Conditions.IsTrue(api.ConditionTypeUpgradeFailed) {
|
||||
a.log.Error().Msgf("Upgrade of member failed")
|
||||
}
|
||||
// Invalidate plan
|
||||
m.Phase = ""
|
||||
m.Conditions.Remove(api.ConditionTypeTerminated)
|
||||
m.Conditions.Remove(api.ConditionTypeUpgradeFailed)
|
||||
|
||||
if m.OldImage != nil {
|
||||
m.Image = m.OldImage.DeepCopy()
|
||||
}
|
||||
|
||||
if err := a.actionCtx.UpdateMember(m); err != nil {
|
||||
return false, true, nil
|
||||
}
|
||||
|
||||
log.Error().Msgf("Upgrade failed")
|
||||
return false, true, nil
|
||||
}
|
||||
}
|
||||
|
||||
log = log.With().
|
||||
Str("pod-name", m.PodName).
|
||||
Bool("is-upgrading", isUpgrading).Logger()
|
||||
|
@ -128,6 +153,9 @@ func (a *actionUpgradeMember) CheckProgress(ctx context.Context) (bool, bool, er
|
|||
m.Phase = api.MemberPhaseCreated
|
||||
m.RecentTerminations = nil // Since we're upgrading, we do not care about old terminations.
|
||||
m.CleanoutJobID = ""
|
||||
if !m.OldImage.Equal(m.Image) && isUpgrading {
|
||||
m.OldImage = m.Image.DeepCopy()
|
||||
}
|
||||
if err := a.actionCtx.UpdateMember(m); err != nil {
|
||||
return false, false, errors.WithStack(err)
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ package reconcile
|
|||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/errors"
|
||||
|
||||
|
@ -139,7 +140,10 @@ func (a *actionWaitForMemberUp) checkProgressAgent(ctx context.Context) (bool, b
|
|||
a.Endpoints()
|
||||
}
|
||||
|
||||
if err := agency.AreAgentsHealthy(ctx, clients); err != nil {
|
||||
shortCtx, c := context.WithTimeout(ctx, 3*time.Second)
|
||||
defer c()
|
||||
|
||||
if err := agency.AreAgentsHealthy(shortCtx, clients); err != nil {
|
||||
log.Debug().Err(err).Msg("Not all agents are ready")
|
||||
return false, false, nil
|
||||
}
|
||||
|
|
|
@ -113,7 +113,6 @@ func createPlan(ctx context.Context, log zerolog.Logger, apiObject k8sutil.APIOb
|
|||
currentPlan api.Plan, spec api.DeploymentSpec,
|
||||
status api.DeploymentStatus, cachedStatus inspector.Inspector,
|
||||
builderCtx PlanBuilderContext) (api.Plan, bool) {
|
||||
|
||||
if !currentPlan.IsEmpty() {
|
||||
// Plan already exists, complete that first
|
||||
return currentPlan, false
|
||||
|
@ -313,6 +312,11 @@ type planBuilder func(ctx context.Context,
|
|||
spec api.DeploymentSpec, status api.DeploymentStatus,
|
||||
cachedStatus inspector.Inspector, context PlanBuilderContext) api.Plan
|
||||
|
||||
type planBuilderCondition func(ctx context.Context,
|
||||
log zerolog.Logger, apiObject k8sutil.APIObject,
|
||||
spec api.DeploymentSpec, status api.DeploymentStatus,
|
||||
cachedStatus inspector.Inspector, context PlanBuilderContext) bool
|
||||
|
||||
type planBuilderSubPlan func(ctx context.Context,
|
||||
log zerolog.Logger, apiObject k8sutil.APIObject,
|
||||
spec api.DeploymentSpec, status api.DeploymentStatus,
|
||||
|
@ -335,6 +339,7 @@ func NewWithPlanBuilder(ctx context.Context,
|
|||
|
||||
type WithPlanBuilder interface {
|
||||
Apply(p planBuilder) api.Plan
|
||||
ApplyWithCondition(c planBuilderCondition, p planBuilder) api.Plan
|
||||
ApplySubPlan(p planBuilderSubPlan, plans ...planBuilder) api.Plan
|
||||
}
|
||||
|
||||
|
@ -348,6 +353,14 @@ type withPlanBuilder struct {
|
|||
context PlanBuilderContext
|
||||
}
|
||||
|
||||
func (w withPlanBuilder) ApplyWithCondition(c planBuilderCondition, p planBuilder) api.Plan {
|
||||
if !c(w.ctx, w.log, w.apiObject, w.spec, w.status, w.cachedStatus, w.context) {
|
||||
return api.Plan{}
|
||||
}
|
||||
|
||||
return w.Apply(p)
|
||||
}
|
||||
|
||||
func (w withPlanBuilder) ApplySubPlan(p planBuilderSubPlan, plans ...planBuilder) api.Plan {
|
||||
return p(w.ctx, w.log, w.apiObject, w.spec, w.status, w.cachedStatus, w.context, w, plans...)
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@ import (
|
|||
"context"
|
||||
"fmt"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/errors"
|
||||
|
||||
|
@ -220,12 +221,16 @@ func createJWTStatusUpdateRequired(ctx context.Context,
|
|||
func areJWTTokensUpToDate(ctx context.Context,
|
||||
log zerolog.Logger, apiObject k8sutil.APIObject,
|
||||
spec api.DeploymentSpec, status api.DeploymentStatus,
|
||||
cachedStatus inspector.Inspector, context PlanBuilderContext,
|
||||
cachedStatus inspector.Inspector, planCtx PlanBuilderContext,
|
||||
folder *core.Secret) (plan api.Plan, failed bool) {
|
||||
gCtx, c := context.WithTimeout(ctx, 2*time.Second)
|
||||
defer c()
|
||||
|
||||
status.Members.ForeachServerGroup(func(group api.ServerGroup, list api.MemberStatusList) error {
|
||||
for _, m := range list {
|
||||
if updateRequired, failedMember := isJWTTokenUpToDate(ctx, log, apiObject, spec, status, cachedStatus, context, group, m, folder); failedMember {
|
||||
nCtx, c := context.WithTimeout(gCtx, 500*time.Millisecond)
|
||||
defer c()
|
||||
if updateRequired, failedMember := isJWTTokenUpToDate(nCtx, log, apiObject, spec, status, cachedStatus, planCtx, group, m, folder); failedMember {
|
||||
failed = true
|
||||
continue
|
||||
} else if updateRequired {
|
||||
|
|
|
@ -290,7 +290,7 @@ func createCACleanPlan(ctx context.Context,
|
|||
func createKeyfileRenewalPlanDefault(ctx context.Context,
|
||||
log zerolog.Logger, apiObject k8sutil.APIObject,
|
||||
spec api.DeploymentSpec, status api.DeploymentStatus,
|
||||
cachedStatus inspector.Inspector, context PlanBuilderContext) api.Plan {
|
||||
cachedStatus inspector.Inspector, planCtx PlanBuilderContext) api.Plan {
|
||||
if !spec.TLS.IsSecure() {
|
||||
return nil
|
||||
}
|
||||
|
@ -306,7 +306,11 @@ func createKeyfileRenewalPlanDefault(ctx context.Context,
|
|||
if !plan.IsEmpty() {
|
||||
return nil
|
||||
}
|
||||
if renew, recreate := keyfileRenewalRequired(ctx, log, apiObject, spec, status, cachedStatus, context, group, member, api.TLSRotateModeRecreate); renew {
|
||||
|
||||
lCtx, c := context.WithTimeout(ctx, 500*time.Millisecond)
|
||||
defer c()
|
||||
|
||||
if renew, recreate := keyfileRenewalRequired(lCtx, log, apiObject, spec, status, cachedStatus, planCtx, group, member, api.TLSRotateModeRecreate); renew {
|
||||
log.Info().Msg("Renewal of keyfile required - Recreate")
|
||||
if recreate {
|
||||
plan = append(plan, api.NewAction(api.ActionTypeCleanTLSKeyfileCertificate, group, member.ID, "Remove server keyfile and enforce renewal"))
|
||||
|
@ -324,7 +328,7 @@ func createKeyfileRenewalPlanDefault(ctx context.Context,
|
|||
func createKeyfileRenewalPlanInPlace(ctx context.Context,
|
||||
log zerolog.Logger, apiObject k8sutil.APIObject,
|
||||
spec api.DeploymentSpec, status api.DeploymentStatus,
|
||||
cachedStatus inspector.Inspector, context PlanBuilderContext) api.Plan {
|
||||
cachedStatus inspector.Inspector, planCtx PlanBuilderContext) api.Plan {
|
||||
if !spec.TLS.IsSecure() {
|
||||
return nil
|
||||
}
|
||||
|
@ -337,7 +341,10 @@ func createKeyfileRenewalPlanInPlace(ctx context.Context,
|
|||
}
|
||||
|
||||
for _, member := range members {
|
||||
if renew, recreate := keyfileRenewalRequired(ctx, log, apiObject, spec, status, cachedStatus, context, group, member, api.TLSRotateModeInPlace); renew {
|
||||
lCtx, c := context.WithTimeout(ctx, 500*time.Millisecond)
|
||||
defer c()
|
||||
|
||||
if renew, recreate := keyfileRenewalRequired(lCtx, log, apiObject, spec, status, cachedStatus, planCtx, group, member, api.TLSRotateModeInPlace); renew {
|
||||
log.Info().Msg("Renewal of keyfile required - InPlace")
|
||||
if recreate {
|
||||
plan = append(plan, api.NewAction(api.ActionTypeCleanTLSKeyfileCertificate, group, member.ID, "Remove server keyfile and enforce renewal"))
|
||||
|
@ -355,16 +362,19 @@ func createKeyfileRenewalPlanInPlace(ctx context.Context,
|
|||
func createKeyfileRenewalPlan(ctx context.Context,
|
||||
log zerolog.Logger, apiObject k8sutil.APIObject,
|
||||
spec api.DeploymentSpec, status api.DeploymentStatus,
|
||||
cachedStatus inspector.Inspector, context PlanBuilderContext) api.Plan {
|
||||
cachedStatus inspector.Inspector, planCtx PlanBuilderContext) api.Plan {
|
||||
if !spec.TLS.IsSecure() {
|
||||
return nil
|
||||
}
|
||||
|
||||
gCtx, c := context.WithTimeout(ctx, 2*time.Second)
|
||||
defer c()
|
||||
|
||||
switch createKeyfileRenewalPlanMode(spec, status) {
|
||||
case api.TLSRotateModeInPlace:
|
||||
return createKeyfileRenewalPlanInPlace(ctx, log, apiObject, spec, status, cachedStatus, context)
|
||||
return createKeyfileRenewalPlanInPlace(gCtx, log, apiObject, spec, status, cachedStatus, planCtx)
|
||||
default:
|
||||
return createKeyfileRenewalPlanDefault(ctx, log, apiObject, spec, status, cachedStatus, context)
|
||||
return createKeyfileRenewalPlanDefault(gCtx, log, apiObject, spec, status, cachedStatus, planCtx)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -420,6 +430,8 @@ func checkServerValidCertRequest(ctx context.Context, context PlanBuilderContext
|
|||
return nil, err
|
||||
}
|
||||
|
||||
req = req.WithContext(ctx)
|
||||
|
||||
if auth != nil && auth.Type() == driver.AuthenticationTypeRaw {
|
||||
if h := auth.Get("value"); h != "" {
|
||||
req.Header.Add("Authorization", h)
|
||||
|
|
|
@ -61,7 +61,7 @@ func versionHasAdvertisedEndpoint(v driver.Version) bool {
|
|||
return v.CompareTo("3.4.0") >= 0
|
||||
}
|
||||
|
||||
// createArangodArgs creates command line arguments for an arangod server in the given group.
|
||||
// createArangodArgsWithUpgrade creates command line arguments for an arangod server upgrade in the given group.
|
||||
func createArangodArgsWithUpgrade(input pod.Input, additionalOptions ...k8sutil.OptionPair) []string {
|
||||
return createArangodArgs(input, pod.AutoUpgrade().Args(input)...)
|
||||
}
|
||||
|
@ -484,8 +484,8 @@ func (r *Resources) createPodForMember(spec api.DeploymentSpec, memberID string,
|
|||
m.PodUID = uid
|
||||
m.PodSpecVersion = sha
|
||||
m.Endpoint = util.NewString(k8sutil.CreatePodDNSNameWithDomain(apiObject, spec.ClusterDomain, role, m.ID))
|
||||
m.ArangoVersion = status.CurrentImage.ArangoDBVersion
|
||||
m.ImageID = status.CurrentImage.ImageID
|
||||
m.ArangoVersion = m.Image.ArangoDBVersion
|
||||
m.ImageID = m.Image.ImageID
|
||||
|
||||
// Check for missing side cars in
|
||||
m.SideCarSpecs = make(map[string]core.Container)
|
||||
|
@ -543,6 +543,8 @@ func (r *Resources) createPodForMember(spec api.DeploymentSpec, memberID string,
|
|||
m.Conditions.Remove(api.ConditionTypeTerminating)
|
||||
m.Conditions.Remove(api.ConditionTypeAgentRecoveryNeeded)
|
||||
m.Conditions.Remove(api.ConditionTypeAutoUpgrade)
|
||||
m.Conditions.Remove(api.ConditionTypeUpgradeFailed)
|
||||
m.Upgrade = false
|
||||
if err := status.Members.Update(m, group); err != nil {
|
||||
return errors.WithStack(err)
|
||||
}
|
||||
|
|
|
@ -426,7 +426,7 @@ func (m *MemberArangoDPod) GetInitContainers() ([]core.Container, error) {
|
|||
|
||||
{
|
||||
// Upgrade container - run in background
|
||||
if m.autoUpgrade {
|
||||
if m.autoUpgrade || m.status.Upgrade {
|
||||
args := createArangodArgsWithUpgrade(m.AsInput())
|
||||
|
||||
c, err := k8sutil.NewContainer(args, m.GetContainerCreator())
|
||||
|
@ -443,6 +443,28 @@ func (m *MemberArangoDPod) GetInitContainers() ([]core.Container, error) {
|
|||
|
||||
initContainers = append(initContainers, c)
|
||||
}
|
||||
|
||||
// VersionCheck Container
|
||||
{
|
||||
versionArgs := pod.UpgradeVersionCheck().Args(m.AsInput())
|
||||
if len(versionArgs) > 0 {
|
||||
args := createArangodArgs(m.AsInput(), versionArgs...)
|
||||
|
||||
c, err := k8sutil.NewContainer(args, m.GetContainerCreator())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
_, c.VolumeMounts = m.GetVolumes()
|
||||
|
||||
c.Name = api.ServerGroupReservedInitContainerNameVersionCheck
|
||||
c.Lifecycle = nil
|
||||
c.LivenessProbe = nil
|
||||
c.ReadinessProbe = nil
|
||||
|
||||
initContainers = append(initContainers, c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return initContainers, nil
|
||||
|
|
|
@ -113,20 +113,64 @@ func (r *Resources) InspectPods(ctx context.Context, cachedStatus inspector.Insp
|
|||
// Pod has terminated with at least 1 container with a non-zero exit code.
|
||||
wasTerminated := memberStatus.Conditions.IsTrue(api.ConditionTypeTerminated)
|
||||
if memberStatus.Conditions.Update(api.ConditionTypeTerminated, true, "Pod Failed", "") {
|
||||
if c, ok := k8sutil.GetContainerStatusByName(pod, k8sutil.ServerContainerName); ok {
|
||||
if t := c.State.Terminated; t != nil {
|
||||
log.Warn().Str("member", memberStatus.ID).
|
||||
Str("pod", pod.GetName()).
|
||||
Str("uid", string(pod.GetUID())).
|
||||
Int32("exit-code", t.ExitCode).
|
||||
Str("reason", t.Reason).
|
||||
Str("message", t.Message).
|
||||
Int32("signal", t.Signal).
|
||||
Time("started", t.StartedAt.Time).
|
||||
Time("finished", t.FinishedAt.Time).
|
||||
Msgf("Pod failed in unexpected way")
|
||||
if containers := k8sutil.GetFailedContainerNames(pod.Status.InitContainerStatuses); len(containers) > 0 {
|
||||
for _, container := range containers {
|
||||
switch container {
|
||||
case api.ServerGroupReservedInitContainerNameVersionCheck:
|
||||
if c, ok := k8sutil.GetAnyContainerStatusByName(pod.Status.InitContainerStatuses, container); ok {
|
||||
if t := c.State.Terminated; t != nil {
|
||||
if t := c.State.Terminated; t != nil && t.ExitCode == 11 {
|
||||
memberStatus.Upgrade = true
|
||||
updateMemberStatusNeeded = true
|
||||
}
|
||||
}
|
||||
}
|
||||
case api.ServerGroupReservedInitContainerNameUpgrade:
|
||||
memberStatus.Conditions.Update(api.ConditionTypeUpgradeFailed, true, "Upgrade Failed", "")
|
||||
}
|
||||
|
||||
if c, ok := k8sutil.GetAnyContainerStatusByName(pod.Status.InitContainerStatuses, container); ok {
|
||||
if t := c.State.Terminated; t != nil {
|
||||
if t := c.State.Terminated; t != nil && t.ExitCode != 0 {
|
||||
log.Warn().Str("member", memberStatus.ID).
|
||||
Str("pod", pod.GetName()).
|
||||
Str("container", container).
|
||||
Str("uid", string(pod.GetUID())).
|
||||
Int32("exit-code", t.ExitCode).
|
||||
Str("reason", t.Reason).
|
||||
Str("message", t.Message).
|
||||
Int32("signal", t.Signal).
|
||||
Time("started", t.StartedAt.Time).
|
||||
Time("finished", t.FinishedAt.Time).
|
||||
Msgf("Pod failed in unexpected way: Init Container failed")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if containers := k8sutil.GetFailedContainerNames(pod.Status.ContainerStatuses); len(containers) > 0 {
|
||||
for _, container := range containers {
|
||||
if c, ok := k8sutil.GetAnyContainerStatusByName(pod.Status.ContainerStatuses, container); ok {
|
||||
if t := c.State.Terminated; t != nil {
|
||||
if t := c.State.Terminated; t != nil && t.ExitCode != 0 {
|
||||
log.Warn().Str("member", memberStatus.ID).
|
||||
Str("pod", pod.GetName()).
|
||||
Str("container", container).
|
||||
Str("uid", string(pod.GetUID())).
|
||||
Int32("exit-code", t.ExitCode).
|
||||
Str("reason", t.Reason).
|
||||
Str("message", t.Message).
|
||||
Int32("signal", t.Signal).
|
||||
Time("started", t.StartedAt.Time).
|
||||
Time("finished", t.FinishedAt.Time).
|
||||
Msgf("Pod failed in unexpected way: Core Container failed")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log.Debug().Str("pod-name", pod.GetName()).Msg("Updating member condition Terminated to true: Pod Failed")
|
||||
updateMemberStatusNeeded = true
|
||||
nextInterval = nextInterval.ReduceTo(recheckSoonPodInspectorInterval)
|
||||
|
|
|
@ -22,33 +22,57 @@
|
|||
|
||||
package k8sutil
|
||||
|
||||
import v1 "k8s.io/api/core/v1"
|
||||
import core "k8s.io/api/core/v1"
|
||||
|
||||
// GetContainerByName returns the container in the given pod with the given name.
|
||||
// Returns false if not found.
|
||||
func GetContainerByName(p *v1.Pod, name string) (v1.Container, bool) {
|
||||
func GetContainerByName(p *core.Pod, name string) (core.Container, bool) {
|
||||
for _, c := range p.Spec.Containers {
|
||||
if c.Name == name {
|
||||
return c, true
|
||||
}
|
||||
}
|
||||
return v1.Container{}, false
|
||||
return core.Container{}, false
|
||||
}
|
||||
|
||||
// GetContainerStatusByName returns the container status in the given pod with the given name.
|
||||
// Returns false if not found.
|
||||
func GetContainerStatusByName(p *v1.Pod, name string) (v1.ContainerStatus, bool) {
|
||||
func GetContainerStatusByName(p *core.Pod, name string) (core.ContainerStatus, bool) {
|
||||
for _, c := range p.Status.ContainerStatuses {
|
||||
if c.Name == name {
|
||||
return c, true
|
||||
}
|
||||
}
|
||||
return v1.ContainerStatus{}, false
|
||||
return core.ContainerStatus{}, false
|
||||
}
|
||||
|
||||
// GetAnyContainerStatusByName returns the container status in the given ContainerStatus list with the given name.
|
||||
// Returns false if not found.
|
||||
func GetAnyContainerStatusByName(containers []core.ContainerStatus, name string) (core.ContainerStatus, bool) {
|
||||
for _, c := range containers {
|
||||
if c.Name == name {
|
||||
return c, true
|
||||
}
|
||||
}
|
||||
return core.ContainerStatus{}, false
|
||||
}
|
||||
|
||||
// GetFailedContainerNames returns list of failed containers from provided list of statuses.
|
||||
func GetFailedContainerNames(containers []core.ContainerStatus) []string {
|
||||
var failedContainers []string
|
||||
|
||||
for _, c := range containers {
|
||||
if IsContainerFailed(&c) {
|
||||
failedContainers = append(failedContainers, c.Name)
|
||||
}
|
||||
}
|
||||
|
||||
return failedContainers
|
||||
}
|
||||
|
||||
// IsResourceRequirementsChanged returns true if the resource requirements have changed.
|
||||
func IsResourceRequirementsChanged(wanted, given v1.ResourceRequirements) bool {
|
||||
checkList := func(wanted, given v1.ResourceList) bool {
|
||||
func IsResourceRequirementsChanged(wanted, given core.ResourceRequirements) bool {
|
||||
checkList := func(wanted, given core.ResourceList) bool {
|
||||
for k, v := range wanted {
|
||||
if gv, ok := given[k]; !ok {
|
||||
return true
|
||||
|
|
|
@ -121,6 +121,18 @@ func IsPodFailed(pod *core.Pod) bool {
|
|||
}
|
||||
}
|
||||
|
||||
// IsContainerFailed returns true if the arangodb container
|
||||
// has terminated wih a non-zero exit code.
|
||||
func IsContainerFailed(container *core.ContainerStatus) bool {
|
||||
if c := container.State.Terminated; c != nil {
|
||||
if c.ExitCode != 0 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// IsPodScheduled returns true if the pod has been scheduled.
|
||||
func IsPodScheduled(pod *core.Pod) bool {
|
||||
condition := getPodCondition(&pod.Status, core.PodScheduled)
|
||||
|
|
Loading…
Reference in a new issue