mirror of
https://github.com/arangodb/kube-arangodb.git
synced 2024-12-14 11:57:37 +00:00
[Bugfix] Add DistributeShardsLike support (#1037)
This commit is contained in:
parent
18ab40c559
commit
cde52bb084
32 changed files with 1695 additions and 101 deletions
|
@ -27,6 +27,7 @@
|
|||
- (Documentation) Refactor metrics (Part 1)
|
||||
- (Bugfix) Extend Agency HealthCheck for replace
|
||||
- (Bugfix) Allow to remove resources (CPU & Memory) on the managed pods
|
||||
- (Bugfix) Add DistributeShardsLike support
|
||||
|
||||
## [1.2.13](https://github.com/arangodb/kube-arangodb/tree/1.2.13) (2022-06-07)
|
||||
- (Bugfix) Fix arangosync members state inspection
|
||||
|
|
|
@ -2,15 +2,20 @@
|
|||
|
||||
## List
|
||||
|
||||
| Name | Namespace | Group | Type | Description |
|
||||
|:---------------------------------------------------------------------------------------------------------------:|:-----------------:|:------------:|:-----:|:---------------------------------------------------|
|
||||
| [arangodb_operator_agency_errors](./arangodb_operator_agency_errors.md) | arangodb_operator | agency | Count | Current count of agency cache fetch errors |
|
||||
| [arangodb_operator_agency_fetches](./arangodb_operator_agency_fetches.md) | arangodb_operator | agency | Count | Current count of agency cache fetches |
|
||||
| [arangodb_operator_agency_index](./arangodb_operator_agency_index.md) | arangodb_operator | agency | Gauge | Current index of the agency cache |
|
||||
| [arangodb_operator_agency_cache_health_present](./arangodb_operator_agency_cache_health_present.md) | arangodb_operator | agency_cache | Gauge | Determines if local agency cache health is present |
|
||||
| [arangodb_operator_agency_cache_healthy](./arangodb_operator_agency_cache_healthy.md) | arangodb_operator | agency_cache | Gauge | Determines if agency is healthy |
|
||||
| [arangodb_operator_agency_cache_leaders](./arangodb_operator_agency_cache_leaders.md) | arangodb_operator | agency_cache | Gauge | Determines agency leader vote count |
|
||||
| [arangodb_operator_agency_cache_member_commit_offset](./arangodb_operator_agency_cache_member_commit_offset.md) | arangodb_operator | agency_cache | Gauge | Determines agency member commit offset |
|
||||
| [arangodb_operator_agency_cache_member_serving](./arangodb_operator_agency_cache_member_serving.md) | arangodb_operator | agency_cache | Gauge | Determines if agency member is reachable |
|
||||
| [arangodb_operator_agency_cache_present](./arangodb_operator_agency_cache_present.md) | arangodb_operator | agency_cache | Gauge | Determines if local agency cache is present |
|
||||
| [arangodb_operator_agency_cache_serving](./arangodb_operator_agency_cache_serving.md) | arangodb_operator | agency_cache | Gauge | Determines if agency is serving |
|
||||
| Name | Namespace | Group | Type | Description |
|
||||
|:---------------------------------------------------------------------------------------------------------------:|:-----------------:|:------------:|:-------:|:---------------------------------------------------|
|
||||
| [arangodb_operator_agency_errors](./arangodb_operator_agency_errors.md) | arangodb_operator | agency | Counter | Current count of agency cache fetch errors |
|
||||
| [arangodb_operator_agency_fetches](./arangodb_operator_agency_fetches.md) | arangodb_operator | agency | Counter | Current count of agency cache fetches |
|
||||
| [arangodb_operator_agency_index](./arangodb_operator_agency_index.md) | arangodb_operator | agency | Gauge | Current index of the agency cache |
|
||||
| [arangodb_operator_agency_cache_health_present](./arangodb_operator_agency_cache_health_present.md) | arangodb_operator | agency_cache | Gauge | Determines if local agency cache health is present |
|
||||
| [arangodb_operator_agency_cache_healthy](./arangodb_operator_agency_cache_healthy.md) | arangodb_operator | agency_cache | Gauge | Determines if agency is healthy |
|
||||
| [arangodb_operator_agency_cache_leaders](./arangodb_operator_agency_cache_leaders.md) | arangodb_operator | agency_cache | Gauge | Determines agency leader vote count |
|
||||
| [arangodb_operator_agency_cache_member_commit_offset](./arangodb_operator_agency_cache_member_commit_offset.md) | arangodb_operator | agency_cache | Gauge | Determines agency member commit offset |
|
||||
| [arangodb_operator_agency_cache_member_serving](./arangodb_operator_agency_cache_member_serving.md) | arangodb_operator | agency_cache | Gauge | Determines if agency member is reachable |
|
||||
| [arangodb_operator_agency_cache_present](./arangodb_operator_agency_cache_present.md) | arangodb_operator | agency_cache | Gauge | Determines if local agency cache is present |
|
||||
| [arangodb_operator_agency_cache_serving](./arangodb_operator_agency_cache_serving.md) | arangodb_operator | agency_cache | Gauge | Determines if agency is serving |
|
||||
| [arangodb_operator_rebalancer_enabled](./arangodb_operator_rebalancer_enabled.md) | arangodb_operator | rebalancer | Gauge | Determines if rebalancer is enabled |
|
||||
| [arangodb_operator_rebalancer_moves_current](./arangodb_operator_rebalancer_moves_current.md) | arangodb_operator | rebalancer | Gauge | Define how many moves are currently in progress |
|
||||
| [arangodb_operator_rebalancer_moves_failed](./arangodb_operator_rebalancer_moves_failed.md) | arangodb_operator | rebalancer | Counter | Define how many moves failed |
|
||||
| [arangodb_operator_rebalancer_moves_generated](./arangodb_operator_rebalancer_moves_generated.md) | arangodb_operator | rebalancer | Counter | Define how many moves were generated |
|
||||
| [arangodb_operator_rebalancer_moves_succeeded](./arangodb_operator_rebalancer_moves_succeeded.md) | arangodb_operator | rebalancer | Counter | Define how many moves succeeded |
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# arangodb_operator_agency_errors (Count)
|
||||
# arangodb_operator_agency_errors (Counter)
|
||||
|
||||
## Description
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# arangodb_operator_agency_fetches (Count)
|
||||
# arangodb_operator_agency_fetches (Counter)
|
||||
|
||||
## Description
|
||||
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
# arangodb_operator_rebalancer_enabled (Gauge)
|
||||
|
||||
## Description
|
||||
|
||||
Determines if rebalancer is enabled
|
||||
|
||||
## Labels
|
||||
|
||||
| Label | Description |
|
||||
|:---------:|:---------------------|
|
||||
| namespace | Deployment Namespace |
|
||||
| name | Deployment Name |
|
|
@ -0,0 +1,12 @@
|
|||
# arangodb_operator_rebalancer_moves_current (Gauge)
|
||||
|
||||
## Description
|
||||
|
||||
Define how many moves are currently in progress
|
||||
|
||||
## Labels
|
||||
|
||||
| Label | Description |
|
||||
|:---------:|:---------------------|
|
||||
| namespace | Deployment Namespace |
|
||||
| name | Deployment Name |
|
|
@ -0,0 +1,12 @@
|
|||
# arangodb_operator_rebalancer_moves_failed (Counter)
|
||||
|
||||
## Description
|
||||
|
||||
Define how many moves failed
|
||||
|
||||
## Labels
|
||||
|
||||
| Label | Description |
|
||||
|:---------:|:---------------------|
|
||||
| namespace | Deployment Namespace |
|
||||
| name | Deployment Name |
|
|
@ -0,0 +1,12 @@
|
|||
# arangodb_operator_rebalancer_moves_generated (Counter)
|
||||
|
||||
## Description
|
||||
|
||||
Define how many moves were generated
|
||||
|
||||
## Labels
|
||||
|
||||
| Label | Description |
|
||||
|:---------:|:---------------------|
|
||||
| namespace | Deployment Namespace |
|
||||
| name | Deployment Name |
|
|
@ -0,0 +1,12 @@
|
|||
# arangodb_operator_rebalancer_moves_succeeded (Counter)
|
||||
|
||||
## Description
|
||||
|
||||
Define how many moves succeeded
|
||||
|
||||
## Labels
|
||||
|
||||
| Label | Description |
|
||||
|:---------:|:---------------------|
|
||||
| namespace | Deployment Namespace |
|
||||
| name | Deployment Name |
|
1
go.mod
1
go.mod
|
@ -28,7 +28,6 @@ require (
|
|||
github.com/arangodb/go-driver v1.2.1
|
||||
github.com/arangodb/go-driver/v2 v2.0.0-20211021031401-d92dcd5a4c83
|
||||
github.com/arangodb/go-upgrade-rules v0.0.0-20180809110947-031b4774ff21
|
||||
github.com/arangodb/rebalancer v0.1.1
|
||||
github.com/cenkalti/backoff v2.2.1+incompatible
|
||||
github.com/dchest/uniuri v0.0.0-20160212164326-8902c56451e9
|
||||
github.com/gin-gonic/gin v1.7.2
|
||||
|
|
|
@ -88,7 +88,7 @@ namespaces:
|
|||
fetches:
|
||||
shortDescription: "Current count of agency cache fetches"
|
||||
description: "Current count of agency cache fetches"
|
||||
type: "Count"
|
||||
type: "Counter"
|
||||
labels:
|
||||
- key: namespace
|
||||
description: "Deployment Namespace"
|
||||
|
@ -97,7 +97,53 @@ namespaces:
|
|||
errors:
|
||||
shortDescription: "Current count of agency cache fetch errors"
|
||||
description: "Current count of agency cache fetch errors"
|
||||
type: "Count"
|
||||
type: "Counter"
|
||||
labels:
|
||||
- key: namespace
|
||||
description: "Deployment Namespace"
|
||||
- key: name
|
||||
description: "Deployment Name"
|
||||
rebalancer:
|
||||
enabled:
|
||||
shortDescription: "Determines if rebalancer is enabled"
|
||||
description: "Determines if rebalancer is enabled"
|
||||
type: "Gauge"
|
||||
labels:
|
||||
- key: namespace
|
||||
description: "Deployment Namespace"
|
||||
- key: name
|
||||
description: "Deployment Name"
|
||||
moves_generated:
|
||||
shortDescription: "Define how many moves were generated"
|
||||
description: "Define how many moves were generated"
|
||||
type: "Counter"
|
||||
labels:
|
||||
- key: namespace
|
||||
description: "Deployment Namespace"
|
||||
- key: name
|
||||
description: "Deployment Name"
|
||||
moves_succeeded:
|
||||
shortDescription: "Define how many moves succeeded"
|
||||
description: "Define how many moves succeeded"
|
||||
type: "Counter"
|
||||
labels:
|
||||
- key: namespace
|
||||
description: "Deployment Namespace"
|
||||
- key: name
|
||||
description: "Deployment Name"
|
||||
moves_failed:
|
||||
shortDescription: "Define how many moves failed"
|
||||
description: "Define how many moves failed"
|
||||
type: "Counter"
|
||||
labels:
|
||||
- key: namespace
|
||||
description: "Deployment Namespace"
|
||||
- key: name
|
||||
description: "Deployment Name"
|
||||
moves_current:
|
||||
shortDescription: "Define how many moves are currently in progress"
|
||||
description: "Define how many moves are currently in progress"
|
||||
type: "Gauge"
|
||||
labels:
|
||||
- key: namespace
|
||||
description: "Deployment Namespace"
|
||||
|
|
|
@ -192,6 +192,7 @@ const (
|
|||
// Rebalancer
|
||||
ActionTypeRebalancerGenerate ActionType = "RebalancerGenerate"
|
||||
ActionTypeRebalancerCheck ActionType = "RebalancerCheck"
|
||||
ActionTypeRebalancerClean ActionType = "RebalancerClean"
|
||||
|
||||
// Resources
|
||||
ActionTypeResourceSync ActionType = "ResourceSync"
|
||||
|
|
|
@ -192,6 +192,7 @@ const (
|
|||
// Rebalancer
|
||||
ActionTypeRebalancerGenerate ActionType = "RebalancerGenerate"
|
||||
ActionTypeRebalancerCheck ActionType = "RebalancerCheck"
|
||||
ActionTypeRebalancerClean ActionType = "RebalancerClean"
|
||||
|
||||
// Resources
|
||||
ActionTypeResourceSync ActionType = "ResourceSync"
|
||||
|
|
|
@ -333,7 +333,9 @@ func getLeader(ctx context.Context, size int, clients map[string]agency.Agency)
|
|||
for id := range names {
|
||||
if h.leaderID == h.names[id] {
|
||||
h.leader = clients[names[id]].Connection()
|
||||
return clients[names[id]], configs[id], h, nil
|
||||
if cfg := configs[id]; cfg != nil {
|
||||
return clients[names[id]], cfg, h, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -21,55 +21,20 @@
|
|||
package agency
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func Test_Config_Unmarshal(t *testing.T) {
|
||||
data := `{
|
||||
"term": 0,
|
||||
"leaderId": "AGNT-fd0f4fc7-b60b-44bb-9f5e-5fc91f708f82",
|
||||
"commitIndex": 94,
|
||||
"lastCompactionAt": 0,
|
||||
"nextCompactionAfter": 500,
|
||||
"lastAcked": {
|
||||
"AGNT-fd0f4fc7-b60b-44bb-9f5e-5fc91f708f82": {
|
||||
"lastAckedTime": 0,
|
||||
"lastAckedIndex": 94
|
||||
}
|
||||
},
|
||||
"configuration": {
|
||||
"pool": {
|
||||
"AGNT-fd0f4fc7-b60b-44bb-9f5e-5fc91f708f82": "tcp://[::1]:4001"
|
||||
},
|
||||
"active": [
|
||||
"AGNT-fd0f4fc7-b60b-44bb-9f5e-5fc91f708f82"
|
||||
],
|
||||
"id": "AGNT-fd0f4fc7-b60b-44bb-9f5e-5fc91f708f82",
|
||||
"agency size": 1,
|
||||
"pool size": 1,
|
||||
"endpoint": "tcp://[::1]:4001",
|
||||
"min ping": 1,
|
||||
"max ping": 5,
|
||||
"timeoutMult": 1,
|
||||
"supervision": true,
|
||||
"supervision frequency": 1,
|
||||
"compaction step size": 500,
|
||||
"compaction keep size": 50000,
|
||||
"supervision grace period": 10,
|
||||
"supervision ok threshold": 5,
|
||||
"version": 2,
|
||||
"startup": "origin"
|
||||
},
|
||||
"engine": "rocksdb",
|
||||
"version": "3.10.0-devel"
|
||||
}`
|
||||
//go:embed testdata/config.json
|
||||
var config []byte
|
||||
|
||||
func Test_Config_Unmarshal(t *testing.T) {
|
||||
var cfg Config
|
||||
|
||||
require.NoError(t, json.Unmarshal([]byte(data), &cfg))
|
||||
require.NoError(t, json.Unmarshal(config, &cfg))
|
||||
|
||||
require.Equal(t, "AGNT-fd0f4fc7-b60b-44bb-9f5e-5fc91f708f82", cfg.LeaderId)
|
||||
require.Equal(t, uint64(94), cfg.CommitIndex)
|
||||
|
|
|
@ -60,6 +60,7 @@ type StatePlanCollection struct {
|
|||
MinReplicationFactor *int `json:"minReplicationFactor,omitempty"`
|
||||
WriteConcern *int `json:"writeConcern,omitempty"`
|
||||
ReplicationFactor *ReplicationFactor `json:"replicationFactor,omitempty"`
|
||||
DistributeShardsLike *string `json:"distributeShardsLike,omitempty"`
|
||||
}
|
||||
|
||||
func (a *StatePlanCollection) GetReplicationFactor(shard string) ReplicationFactor {
|
||||
|
|
|
@ -34,6 +34,10 @@ const (
|
|||
|
||||
type ReplicationFactor int
|
||||
|
||||
func (r *ReplicationFactor) IsNil() bool {
|
||||
return r == nil
|
||||
}
|
||||
|
||||
func (r *ReplicationFactor) IsUnknown() bool {
|
||||
if r == nil {
|
||||
return false
|
||||
|
|
File diff suppressed because one or more lines are too long
38
pkg/deployment/agency/testdata/config.json
vendored
Normal file
38
pkg/deployment/agency/testdata/config.json
vendored
Normal file
|
@ -0,0 +1,38 @@
|
|||
{
|
||||
"term": 0,
|
||||
"leaderId": "AGNT-fd0f4fc7-b60b-44bb-9f5e-5fc91f708f82",
|
||||
"commitIndex": 94,
|
||||
"lastCompactionAt": 0,
|
||||
"nextCompactionAfter": 500,
|
||||
"lastAcked": {
|
||||
"AGNT-fd0f4fc7-b60b-44bb-9f5e-5fc91f708f82": {
|
||||
"lastAckedTime": 0,
|
||||
"lastAckedIndex": 94
|
||||
}
|
||||
},
|
||||
"configuration": {
|
||||
"pool": {
|
||||
"AGNT-fd0f4fc7-b60b-44bb-9f5e-5fc91f708f82": "tcp://[::1]:4001"
|
||||
},
|
||||
"active": [
|
||||
"AGNT-fd0f4fc7-b60b-44bb-9f5e-5fc91f708f82"
|
||||
],
|
||||
"id": "AGNT-fd0f4fc7-b60b-44bb-9f5e-5fc91f708f82",
|
||||
"agency size": 1,
|
||||
"pool size": 1,
|
||||
"endpoint": "tcp://[::1]:4001",
|
||||
"min ping": 1,
|
||||
"max ping": 5,
|
||||
"timeoutMult": 1,
|
||||
"supervision": true,
|
||||
"supervision frequency": 1,
|
||||
"compaction step size": 500,
|
||||
"compaction keep size": 50000,
|
||||
"supervision grace period": 10,
|
||||
"supervision ok threshold": 5,
|
||||
"version": 2,
|
||||
"startup": "origin"
|
||||
},
|
||||
"engine": "rocksdb",
|
||||
"version": "3.10.0-devel"
|
||||
}
|
1120
pkg/deployment/agency/testdata/longdata.json
vendored
Normal file
1120
pkg/deployment/agency/testdata/longdata.json
vendored
Normal file
File diff suppressed because it is too large
Load diff
|
@ -157,8 +157,8 @@ func (i *inventory) Add(d *Deployment) {
|
|||
}
|
||||
|
||||
func (d *Deployment) CollectMetrics(m metrics.PushMetric) {
|
||||
m.Push(metric_descriptions.ArangodbOperatorAgencyErrorsCount(float64(d.metrics.agency.errors), d.namespace, d.name))
|
||||
m.Push(metric_descriptions.ArangodbOperatorAgencyFetchesCount(float64(d.metrics.agency.fetches), d.namespace, d.name))
|
||||
m.Push(metric_descriptions.ArangodbOperatorAgencyErrorsCounter(float64(d.metrics.agency.errors), d.namespace, d.name))
|
||||
m.Push(metric_descriptions.ArangodbOperatorAgencyFetchesCounter(float64(d.metrics.agency.fetches), d.namespace, d.name))
|
||||
m.Push(metric_descriptions.ArangodbOperatorAgencyIndexGauge(float64(d.metrics.agency.index), d.namespace, d.name))
|
||||
|
||||
if c := d.agencyCache; c != nil {
|
||||
|
@ -173,4 +173,8 @@ func (d *Deployment) CollectMetrics(m metrics.PushMetric) {
|
|||
} else {
|
||||
m.Push(metric_descriptions.ArangodbOperatorAgencyCachePresentGauge(0, d.namespace, d.name))
|
||||
}
|
||||
|
||||
if c := d.reconciler; c != nil {
|
||||
c.CollectMetrics(m)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -56,6 +56,8 @@ type ActionContext interface {
|
|||
|
||||
sutil.ACSGetter
|
||||
|
||||
Metrics() *Metrics
|
||||
|
||||
ActionLocalsContext
|
||||
|
||||
// GetMemberStatusByID returns the current member status
|
||||
|
@ -106,10 +108,11 @@ type ActionLocalsContext interface {
|
|||
}
|
||||
|
||||
// newActionContext creates a new ActionContext implementation.
|
||||
func newActionContext(log logging.Logger, context Context) ActionContext {
|
||||
func newActionContext(log logging.Logger, context Context, metrics *Metrics) ActionContext {
|
||||
return &actionContext{
|
||||
log: log,
|
||||
context: context,
|
||||
metrics: metrics,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -119,6 +122,11 @@ type actionContext struct {
|
|||
log logging.Logger
|
||||
cachedStatus inspectorInterface.Inspector
|
||||
locals api.PlanLocals
|
||||
metrics *Metrics
|
||||
}
|
||||
|
||||
func (ac *actionContext) Metrics() *Metrics {
|
||||
return ac.metrics
|
||||
}
|
||||
|
||||
func (ac *actionContext) ACS() sutil.ACS {
|
||||
|
|
93
pkg/deployment/reconcile/metrics.go
Normal file
93
pkg/deployment/reconcile/metrics.go
Normal file
|
@ -0,0 +1,93 @@
|
|||
//
|
||||
// DISCLAIMER
|
||||
//
|
||||
// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
//
|
||||
|
||||
package reconcile
|
||||
|
||||
import (
|
||||
"github.com/arangodb/kube-arangodb/pkg/generated/metric_descriptions"
|
||||
"github.com/arangodb/kube-arangodb/pkg/util/metrics"
|
||||
)
|
||||
|
||||
type Metrics struct {
|
||||
Rebalancer MetricsRebalancer
|
||||
}
|
||||
|
||||
func (m *Metrics) GetRebalancer() *MetricsRebalancer {
|
||||
if m == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return &m.Rebalancer
|
||||
}
|
||||
|
||||
type MetricsRebalancer struct {
|
||||
enabled bool
|
||||
moves int
|
||||
current int
|
||||
|
||||
succeeded, failed int
|
||||
}
|
||||
|
||||
func (m *MetricsRebalancer) SetEnabled(enabled bool) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
m.enabled = enabled
|
||||
}
|
||||
|
||||
func (m *MetricsRebalancer) AddMoves(moves int) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
m.moves += moves
|
||||
}
|
||||
|
||||
func (m *MetricsRebalancer) SetCurrent(current int) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
m.current = current
|
||||
}
|
||||
|
||||
func (m *MetricsRebalancer) AddFailures(i int) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
m.failed += i
|
||||
}
|
||||
|
||||
func (m *MetricsRebalancer) AddSuccesses(i int) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
m.succeeded += i
|
||||
}
|
||||
|
||||
func (r *Reconciler) CollectMetrics(m metrics.PushMetric) {
|
||||
if r.metrics.Rebalancer.enabled {
|
||||
m.Push(metric_descriptions.ArangodbOperatorRebalancerEnabledGauge(1, r.namespace, r.name))
|
||||
m.Push(metric_descriptions.ArangodbOperatorRebalancerMovesCurrentGauge(float64(r.metrics.Rebalancer.current), r.namespace, r.name))
|
||||
m.Push(metric_descriptions.ArangodbOperatorRebalancerMovesGeneratedCounter(float64(r.metrics.Rebalancer.moves), r.namespace, r.name))
|
||||
m.Push(metric_descriptions.ArangodbOperatorRebalancerMovesSucceededCounter(float64(r.metrics.Rebalancer.succeeded), r.namespace, r.name))
|
||||
m.Push(metric_descriptions.ArangodbOperatorRebalancerMovesFailedCounter(float64(r.metrics.Rebalancer.failed), r.namespace, r.name))
|
||||
} else {
|
||||
m.Push(metric_descriptions.ArangodbOperatorRebalancerEnabledGauge(0, r.namespace, r.name))
|
||||
}
|
||||
}
|
|
@ -116,40 +116,76 @@ func (p plannerResources) Set(deployment *api.DeploymentStatus, plan api.Plan) b
|
|||
// Returns true when it has to be called again soon.
|
||||
// False otherwise.
|
||||
func (d *Reconciler) ExecutePlan(ctx context.Context) (bool, error) {
|
||||
var callAgain bool
|
||||
execution := 0
|
||||
|
||||
if again, err := d.executePlanStatus(ctx, plannerHigh{}); err != nil {
|
||||
return false, errors.WithStack(err)
|
||||
} else if again {
|
||||
callAgain = true
|
||||
var retrySoon bool
|
||||
|
||||
for {
|
||||
if execution >= 32 {
|
||||
return retrySoon, nil
|
||||
}
|
||||
|
||||
execution++
|
||||
|
||||
if retrySoonCall, recall, err := d.executePlanInLoop(ctx); err != nil {
|
||||
return false, err
|
||||
} else {
|
||||
retrySoon = retrySoon || retrySoonCall
|
||||
if recall {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
if again, err := d.executePlanStatus(ctx, plannerResources{}); err != nil {
|
||||
d.planLogger.Err(err).Error("Execution of plan failed")
|
||||
return false, nil
|
||||
} else if again {
|
||||
callAgain = true
|
||||
}
|
||||
|
||||
if again, err := d.executePlanStatus(ctx, plannerNormal{}); err != nil {
|
||||
return false, errors.WithStack(err)
|
||||
} else if again {
|
||||
callAgain = true
|
||||
}
|
||||
|
||||
return callAgain, nil
|
||||
return retrySoon, nil
|
||||
}
|
||||
|
||||
func (d *Reconciler) executePlanStatus(ctx context.Context, pg planner) (bool, error) {
|
||||
// ExecutePlan tries to execute the plan as far as possible.
|
||||
// Returns true when it has to be called again soon.
|
||||
// False otherwise.
|
||||
func (d *Reconciler) executePlanInLoop(ctx context.Context) (bool, bool, error) {
|
||||
var callAgain bool
|
||||
var callInLoop bool
|
||||
|
||||
if again, inLoop, err := d.executePlanStatus(ctx, plannerHigh{}); err != nil {
|
||||
d.planLogger.Err(err).Error("Execution of plan failed")
|
||||
return false, false, errors.WithStack(err)
|
||||
} else {
|
||||
callAgain = callAgain || again
|
||||
callInLoop = callInLoop || inLoop
|
||||
}
|
||||
|
||||
if again, inLoop, err := d.executePlanStatus(ctx, plannerResources{}); err != nil {
|
||||
d.planLogger.Err(err).Error("Execution of plan failed")
|
||||
return false, false, nil
|
||||
} else {
|
||||
callAgain = callAgain || again
|
||||
callInLoop = callInLoop || inLoop
|
||||
}
|
||||
|
||||
if again, inLoop, err := d.executePlanStatus(ctx, plannerNormal{}); err != nil {
|
||||
d.planLogger.Err(err).Error("Execution of plan failed")
|
||||
return false, false, errors.WithStack(err)
|
||||
} else {
|
||||
callAgain = callAgain || again
|
||||
callInLoop = callInLoop || inLoop
|
||||
}
|
||||
|
||||
return callAgain, callInLoop, nil
|
||||
}
|
||||
|
||||
func (d *Reconciler) executePlanStatus(ctx context.Context, pg planner) (bool, bool, error) {
|
||||
loopStatus, _ := d.context.GetStatus()
|
||||
|
||||
plan := pg.Get(&loopStatus)
|
||||
|
||||
if len(plan) == 0 {
|
||||
return false, nil
|
||||
return false, false, nil
|
||||
}
|
||||
|
||||
newPlan, callAgain, err := d.executePlan(ctx, plan, pg)
|
||||
newPlan, callAgain, callInLoop, err := d.executePlan(ctx, plan, pg)
|
||||
|
||||
// Refresh current status
|
||||
loopStatus, lastVersion := d.context.GetStatus()
|
||||
|
@ -158,23 +194,23 @@ func (d *Reconciler) executePlanStatus(ctx context.Context, pg planner) (bool, e
|
|||
d.planLogger.Info("Updating plan")
|
||||
if err := d.context.UpdateStatus(ctx, loopStatus, lastVersion, true); err != nil {
|
||||
d.planLogger.Err(err).Debug("Failed to update CR status")
|
||||
return false, errors.WithStack(err)
|
||||
return false, false, errors.WithStack(err)
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return false, err
|
||||
return false, false, err
|
||||
}
|
||||
|
||||
return callAgain, nil
|
||||
return callAgain, callInLoop, nil
|
||||
}
|
||||
|
||||
func (d *Reconciler) executePlan(ctx context.Context, statusPlan api.Plan, pg planner) (newPlan api.Plan, callAgain bool, err error) {
|
||||
func (d *Reconciler) executePlan(ctx context.Context, statusPlan api.Plan, pg planner) (newPlan api.Plan, callAgain, callInLoop bool, err error) {
|
||||
plan := statusPlan.DeepCopy()
|
||||
|
||||
for {
|
||||
if len(plan) == 0 {
|
||||
return nil, false, nil
|
||||
return nil, false, false, nil
|
||||
}
|
||||
|
||||
// Take first action
|
||||
|
@ -185,14 +221,14 @@ func (d *Reconciler) executePlan(ctx context.Context, statusPlan api.Plan, pg pl
|
|||
done, abort, recall, retry, err := d.executeAction(ctx, planAction, action)
|
||||
if err != nil {
|
||||
if retry {
|
||||
return plan, true, nil
|
||||
return plan, true, false, nil
|
||||
}
|
||||
// The Plan will be cleaned up, so no actions will be in the queue.
|
||||
actionsCurrentPlan.WithLabelValues(d.context.GetName(), planAction.Group.AsRole(), planAction.MemberID,
|
||||
planAction.Type.String(), pg.Type()).Set(0.0)
|
||||
|
||||
actionsFailedMetrics.WithLabelValues(d.context.GetName(), planAction.Type.String(), pg.Type()).Inc()
|
||||
return nil, false, errors.WithStack(err)
|
||||
return nil, false, false, errors.WithStack(err)
|
||||
}
|
||||
|
||||
if abort {
|
||||
|
@ -201,7 +237,7 @@ func (d *Reconciler) executePlan(ctx context.Context, statusPlan api.Plan, pg pl
|
|||
planAction.Type.String(), pg.Type()).Set(0.0)
|
||||
|
||||
actionsFailedMetrics.WithLabelValues(d.context.GetName(), planAction.Type.String(), pg.Type()).Inc()
|
||||
return nil, true, nil
|
||||
return nil, true, false, nil
|
||||
}
|
||||
|
||||
if done {
|
||||
|
@ -232,19 +268,19 @@ func (d *Reconciler) executePlan(ctx context.Context, statusPlan api.Plan, pg pl
|
|||
d.planLogger.Info("Reloading cached status")
|
||||
if err := c.Refresh(ctx); err != nil {
|
||||
d.planLogger.Err(err).Warn("Unable to reload cached status")
|
||||
return plan, recall, nil
|
||||
return plan, recall, false, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if newPlan, changed := getActionPlanAppender(action, plan); changed {
|
||||
// Our actions have been added to the end of plan
|
||||
return newPlan, true, nil
|
||||
return newPlan, false, true, nil
|
||||
}
|
||||
|
||||
if err := getActionPost(action, ctx); err != nil {
|
||||
d.planLogger.Err(err).Error("Post action failed")
|
||||
return nil, false, errors.WithStack(err)
|
||||
return nil, false, false, errors.WithStack(err)
|
||||
}
|
||||
} else {
|
||||
if !plan[0].IsStarted() {
|
||||
|
@ -258,13 +294,14 @@ func (d *Reconciler) executePlan(ctx context.Context, statusPlan api.Plan, pg pl
|
|||
|
||||
plan[0].Locals.Merge(actionContext.CurrentLocals())
|
||||
|
||||
return plan, recall, nil
|
||||
return plan, recall, false, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Reconciler) executeAction(ctx context.Context, planAction api.Action, action Action) (done, abort, callAgain, retry bool, err error) {
|
||||
log := d.planLogger.Str("action", string(planAction.Type)).Str("member", planAction.MemberID)
|
||||
log.Info("Executing action")
|
||||
|
||||
if !planAction.IsStarted() {
|
||||
// Not started yet
|
||||
|
@ -327,7 +364,7 @@ func (d *Reconciler) executeAction(ctx context.Context, planAction api.Action, a
|
|||
|
||||
// createAction create action object based on action type
|
||||
func (d *Reconciler) createAction(action api.Action) (Action, ActionContext) {
|
||||
actionCtx := newActionContext(d.log, d.context)
|
||||
actionCtx := newActionContext(d.log, d.context, &d.metrics)
|
||||
|
||||
f, ok := getActionFactory(action.Type)
|
||||
if !ok {
|
||||
|
|
|
@ -39,6 +39,8 @@ type Reconciler struct {
|
|||
log logging.Logger
|
||||
planLogger logging.Logger
|
||||
context Context
|
||||
|
||||
metrics Metrics
|
||||
}
|
||||
|
||||
// NewReconciler creates a new reconciler with given context.
|
||||
|
|
|
@ -34,6 +34,6 @@ func ArangodbOperatorAgencyErrors() metrics.Description {
|
|||
return arangodbOperatorAgencyErrors
|
||||
}
|
||||
|
||||
func ArangodbOperatorAgencyErrorsCount(value float64, namespace string, name string) metrics.Metric {
|
||||
func ArangodbOperatorAgencyErrorsCounter(value float64, namespace string, name string) metrics.Metric {
|
||||
return ArangodbOperatorAgencyErrors().Gauge(value, namespace, name)
|
||||
}
|
||||
|
|
|
@ -34,6 +34,6 @@ func ArangodbOperatorAgencyFetches() metrics.Description {
|
|||
return arangodbOperatorAgencyFetches
|
||||
}
|
||||
|
||||
func ArangodbOperatorAgencyFetchesCount(value float64, namespace string, name string) metrics.Metric {
|
||||
func ArangodbOperatorAgencyFetchesCounter(value float64, namespace string, name string) metrics.Metric {
|
||||
return ArangodbOperatorAgencyFetches().Gauge(value, namespace, name)
|
||||
}
|
||||
|
|
|
@ -18,4 +18,22 @@
|
|||
// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
//
|
||||
|
||||
package agency
|
||||
package metric_descriptions
|
||||
|
||||
import "github.com/arangodb/kube-arangodb/pkg/util/metrics"
|
||||
|
||||
var (
|
||||
arangodbOperatorRebalancerEnabled = metrics.NewDescription("arangodb_operator_rebalancer_enabled", "Determines if rebalancer is enabled", []string{`namespace`, `name`}, nil)
|
||||
)
|
||||
|
||||
func init() {
|
||||
registerDescription(arangodbOperatorRebalancerEnabled)
|
||||
}
|
||||
|
||||
func ArangodbOperatorRebalancerEnabled() metrics.Description {
|
||||
return arangodbOperatorRebalancerEnabled
|
||||
}
|
||||
|
||||
func ArangodbOperatorRebalancerEnabledGauge(value float64, namespace string, name string) metrics.Metric {
|
||||
return ArangodbOperatorRebalancerEnabled().Gauge(value, namespace, name)
|
||||
}
|
39
pkg/generated/metric_descriptions/arangodb_operator_rebalancer_moves_current.go
generated
Normal file
39
pkg/generated/metric_descriptions/arangodb_operator_rebalancer_moves_current.go
generated
Normal file
|
@ -0,0 +1,39 @@
|
|||
//
|
||||
// DISCLAIMER
|
||||
//
|
||||
// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
//
|
||||
|
||||
package metric_descriptions
|
||||
|
||||
import "github.com/arangodb/kube-arangodb/pkg/util/metrics"
|
||||
|
||||
var (
|
||||
arangodbOperatorRebalancerMovesCurrent = metrics.NewDescription("arangodb_operator_rebalancer_moves_current", "Define how many moves are currently in progress", []string{`namespace`, `name`}, nil)
|
||||
)
|
||||
|
||||
func init() {
|
||||
registerDescription(arangodbOperatorRebalancerMovesCurrent)
|
||||
}
|
||||
|
||||
func ArangodbOperatorRebalancerMovesCurrent() metrics.Description {
|
||||
return arangodbOperatorRebalancerMovesCurrent
|
||||
}
|
||||
|
||||
func ArangodbOperatorRebalancerMovesCurrentGauge(value float64, namespace string, name string) metrics.Metric {
|
||||
return ArangodbOperatorRebalancerMovesCurrent().Gauge(value, namespace, name)
|
||||
}
|
39
pkg/generated/metric_descriptions/arangodb_operator_rebalancer_moves_failed.go
generated
Normal file
39
pkg/generated/metric_descriptions/arangodb_operator_rebalancer_moves_failed.go
generated
Normal file
|
@ -0,0 +1,39 @@
|
|||
//
|
||||
// DISCLAIMER
|
||||
//
|
||||
// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
//
|
||||
|
||||
package metric_descriptions
|
||||
|
||||
import "github.com/arangodb/kube-arangodb/pkg/util/metrics"
|
||||
|
||||
var (
|
||||
arangodbOperatorRebalancerMovesFailed = metrics.NewDescription("arangodb_operator_rebalancer_moves_failed", "Define how many moves failed", []string{`namespace`, `name`}, nil)
|
||||
)
|
||||
|
||||
func init() {
|
||||
registerDescription(arangodbOperatorRebalancerMovesFailed)
|
||||
}
|
||||
|
||||
func ArangodbOperatorRebalancerMovesFailed() metrics.Description {
|
||||
return arangodbOperatorRebalancerMovesFailed
|
||||
}
|
||||
|
||||
func ArangodbOperatorRebalancerMovesFailedCounter(value float64, namespace string, name string) metrics.Metric {
|
||||
return ArangodbOperatorRebalancerMovesFailed().Gauge(value, namespace, name)
|
||||
}
|
39
pkg/generated/metric_descriptions/arangodb_operator_rebalancer_moves_generated.go
generated
Normal file
39
pkg/generated/metric_descriptions/arangodb_operator_rebalancer_moves_generated.go
generated
Normal file
|
@ -0,0 +1,39 @@
|
|||
//
|
||||
// DISCLAIMER
|
||||
//
|
||||
// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
//
|
||||
|
||||
package metric_descriptions
|
||||
|
||||
import "github.com/arangodb/kube-arangodb/pkg/util/metrics"
|
||||
|
||||
var (
|
||||
arangodbOperatorRebalancerMovesGenerated = metrics.NewDescription("arangodb_operator_rebalancer_moves_generated", "Define how many moves were generated", []string{`namespace`, `name`}, nil)
|
||||
)
|
||||
|
||||
func init() {
|
||||
registerDescription(arangodbOperatorRebalancerMovesGenerated)
|
||||
}
|
||||
|
||||
func ArangodbOperatorRebalancerMovesGenerated() metrics.Description {
|
||||
return arangodbOperatorRebalancerMovesGenerated
|
||||
}
|
||||
|
||||
func ArangodbOperatorRebalancerMovesGeneratedCounter(value float64, namespace string, name string) metrics.Metric {
|
||||
return ArangodbOperatorRebalancerMovesGenerated().Gauge(value, namespace, name)
|
||||
}
|
39
pkg/generated/metric_descriptions/arangodb_operator_rebalancer_moves_succeeded.go
generated
Normal file
39
pkg/generated/metric_descriptions/arangodb_operator_rebalancer_moves_succeeded.go
generated
Normal file
|
@ -0,0 +1,39 @@
|
|||
//
|
||||
// DISCLAIMER
|
||||
//
|
||||
// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Copyright holder is ArangoDB GmbH, Cologne, Germany
|
||||
//
|
||||
|
||||
package metric_descriptions
|
||||
|
||||
import "github.com/arangodb/kube-arangodb/pkg/util/metrics"
|
||||
|
||||
var (
|
||||
arangodbOperatorRebalancerMovesSucceeded = metrics.NewDescription("arangodb_operator_rebalancer_moves_succeeded", "Define how many moves succeeded", []string{`namespace`, `name`}, nil)
|
||||
)
|
||||
|
||||
func init() {
|
||||
registerDescription(arangodbOperatorRebalancerMovesSucceeded)
|
||||
}
|
||||
|
||||
func ArangodbOperatorRebalancerMovesSucceeded() metrics.Description {
|
||||
return arangodbOperatorRebalancerMovesSucceeded
|
||||
}
|
||||
|
||||
func ArangodbOperatorRebalancerMovesSucceededCounter(value float64, namespace string, name string) metrics.Metric {
|
||||
return ArangodbOperatorRebalancerMovesSucceeded().Gauge(value, namespace, name)
|
||||
}
|
Loading…
Reference in a new issue