diff --git a/CHANGELOG.md b/CHANGELOG.md index fed54a2fb..657cc2f88 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,7 @@ - (Bugfix) Fix ErrorArray String function - (Feature) Switch services to Port names - (Feature) Configurable ArangoD Port +- (Feature) Allow to exclude metrics ## [1.2.20](https://github.com/arangodb/kube-arangodb/tree/1.2.20) (2022-10-25) - (Feature) Add action progress diff --git a/cmd/cmd.go b/cmd/cmd.go index 7ccb96606..8fb4e86a3 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -53,6 +53,7 @@ import ( "github.com/arangodb/kube-arangodb/pkg/deployment/features" "github.com/arangodb/kube-arangodb/pkg/generated/clientset/versioned/scheme" "github.com/arangodb/kube-arangodb/pkg/logging" + "github.com/arangodb/kube-arangodb/pkg/metrics/collector" "github.com/arangodb/kube-arangodb/pkg/operator" "github.com/arangodb/kube-arangodb/pkg/operator/scope" "github.com/arangodb/kube-arangodb/pkg/server" @@ -63,6 +64,7 @@ import ( operatorHTTP "github.com/arangodb/kube-arangodb/pkg/util/http" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" "github.com/arangodb/kube-arangodb/pkg/util/kclient" + "github.com/arangodb/kube-arangodb/pkg/util/metrics" "github.com/arangodb/kube-arangodb/pkg/util/probe" "github.com/arangodb/kube-arangodb/pkg/util/retry" "github.com/arangodb/kube-arangodb/pkg/version" @@ -161,6 +163,9 @@ var ( chaosOptions struct { allowed bool } + metricsOptions struct { + excludedMetricPrefixes []string + } livenessProbe probe.LivenessProbe deploymentProbe probe.ReadyProbe deploymentReplicationProbe probe.ReadyProbe @@ -214,6 +219,7 @@ func init() { f.BoolVar(&crdOptions.install, "crd.install", true, "Install missing CRD if access is possible") f.IntVar(&operatorBackup.concurrentUploads, "backup-concurrent-uploads", globals.DefaultBackupConcurrentUploads, "Number of concurrent uploads per deployment") f.Uint64Var(&memoryLimit.hardLimit, "memory-limit", 0, "Define memory limit for hard shutdown and the dump of goroutines. Used for testing") + f.StringArrayVar(&metricsOptions.excludedMetricPrefixes, "metrics.excluded-prefixes", nil, "List of the excluded metrics prefixes") if err := features.Init(&cmdMain); err != nil { panic(err.Error()) } @@ -253,6 +259,8 @@ func executeMain(cmd *cobra.Command, args []string) { globals.GetGlobals().Kubernetes().RequestBatchSize().Set(operatorKubernetesOptions.maxBatchSize) globals.GetGlobals().Backup().ConcurrentUploads().Set(operatorBackup.concurrentUploads) + collector.GetCollector().SetFilter(metrics.NegateMetricPushFilter(metrics.NewPrefixMetricPushFilter(metricsOptions.excludedMetricPrefixes...))) + kclient.SetDefaultQPS(operatorKubernetesOptions.qps) kclient.SetDefaultBurst(operatorKubernetesOptions.burst) diff --git a/pkg/deployment/old_metrics.go b/pkg/deployment/old_metrics.go index c40d16088..7af088e56 100644 --- a/pkg/deployment/old_metrics.go +++ b/pkg/deployment/old_metrics.go @@ -23,11 +23,10 @@ package deployment import ( "sync" - "github.com/prometheus/client_golang/prometheus" - api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1" "github.com/arangodb/kube-arangodb/pkg/deployment/features" "github.com/arangodb/kube-arangodb/pkg/generated/metric_descriptions" + "github.com/arangodb/kube-arangodb/pkg/metrics/collector" "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector/definitions" "github.com/arangodb/kube-arangodb/pkg/util/metrics" ) @@ -49,13 +48,12 @@ func init() { operatorStateRefreshMetric: metrics.NewDescription("arango_operator_deployment_state_refresh_count", "Number of refreshes in deployment", []string{"namespace", "deployment", "type"}, nil), } - prometheus.MustRegister(&localInventory) + collector.GetCollector().RegisterMetric(&localInventory) + collector.GetCollector().RegisterDescription(&localInventory) } var localInventory inventory -var _ prometheus.Collector = &inventory{} - type inventory struct { lock sync.Mutex deployments map[string]map[string]*Deployment @@ -65,32 +63,24 @@ type inventory struct { operatorStateRefreshMetric metrics.Description } -func (i *inventory) Describe(descs chan<- *prometheus.Desc) { - i.lock.Lock() - defer i.lock.Unlock() +func (i *inventory) CollectDescriptions(in metrics.PushDescription) { + in.Push(i.deploymentsMetric, i.deploymentMetricsMembersMetric, i.deploymentAgencyStateMetric, i.deploymentShardLeadersMetric, i.deploymentShardsMetric, i.operatorStateRefreshMetric) - pd := metrics.NewPushDescription(descs) - pd.Push(i.deploymentsMetric, i.deploymentMetricsMembersMetric, i.deploymentAgencyStateMetric, i.deploymentShardLeadersMetric, i.deploymentShardsMetric, i.operatorStateRefreshMetric) - - metric_descriptions.Descriptions(pd) + metric_descriptions.Descriptions(in) } -func (i *inventory) Collect(m chan<- prometheus.Metric) { - i.lock.Lock() - defer i.lock.Unlock() - - p := metrics.NewPushMetric(m) +func (i *inventory) CollectMetrics(in metrics.PushMetric) { for _, deployments := range i.deployments { for _, deployment := range deployments { - p.Push(i.deploymentsMetric.Gauge(1, deployment.GetNamespace(), deployment.GetName())) + in.Push(i.deploymentsMetric.Gauge(1, deployment.GetNamespace(), deployment.GetName())) - deployment.CollectMetrics(p) + deployment.CollectMetrics(in) if state := deployment.acs.CurrentClusterCache(); state != nil { t := state.GetThrottles() for _, c := range definitions.AllComponents() { - p.Push(i.operatorStateRefreshMetric.Gauge(float64(t.Get(c).Count()), deployment.GetNamespace(), deployment.GetName(), string(c))) + in.Push(i.operatorStateRefreshMetric.Gauge(float64(t.Get(c).Count()), deployment.GetNamespace(), deployment.GetName(), string(c))) } } @@ -98,17 +88,17 @@ func (i *inventory) Collect(m chan<- prometheus.Metric) { status := deployment.GetStatus() for _, member := range status.Members.AsList() { - p.Push(i.deploymentMetricsMembersMetric.Gauge(1, deployment.GetNamespace(), deployment.GetName(), member.Group.AsRole(), member.Member.ID)) + in.Push(i.deploymentMetricsMembersMetric.Gauge(1, deployment.GetNamespace(), deployment.GetName(), member.Group.AsRole(), member.Member.ID)) } if spec.Mode.Get().HasAgents() { agency, agencyOk := deployment.GetAgencyCache() if !agencyOk { - p.Push(i.deploymentAgencyStateMetric.Gauge(0, deployment.GetNamespace(), deployment.GetName())) + in.Push(i.deploymentAgencyStateMetric.Gauge(0, deployment.GetNamespace(), deployment.GetName())) continue } - p.Push(i.deploymentAgencyStateMetric.Gauge(1, deployment.GetNamespace(), deployment.GetName())) + in.Push(i.deploymentAgencyStateMetric.Gauge(1, deployment.GetNamespace(), deployment.GetName())) if spec.Mode.Get() == api.DeploymentModeCluster { for db, collections := range agency.Current.Collections { @@ -145,9 +135,9 @@ func (i *inventory) Collect(m chan<- prometheus.Metric) { } if id == 0 { - p.Push(i.deploymentShardLeadersMetric.Gauge(1, m...)) + in.Push(i.deploymentShardLeadersMetric.Gauge(1, m...)) } - p.Push(i.deploymentShardsMetric.Gauge(1, m...)) + in.Push(i.deploymentShardsMetric.Gauge(1, m...)) } } } diff --git a/pkg/metrics/collector/collector.go b/pkg/metrics/collector/collector.go new file mode 100644 index 000000000..709aead06 --- /dev/null +++ b/pkg/metrics/collector/collector.go @@ -0,0 +1,102 @@ +// +// DISCLAIMER +// +// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// + +package collector + +import ( + "sync" + + "github.com/prometheus/client_golang/prometheus" + + "github.com/arangodb/kube-arangodb/pkg/util/metrics" +) + +type Collector interface { + RegisterMetric(m metrics.MCollector) + RegisterDescription(m metrics.DCollector) + + SetFilter(filter metrics.MetricPushFilter) +} + +func init() { + prometheus.MustRegister(collectorObject) +} + +func GetCollector() Collector { + return collectorObject +} + +var collectorObject = &collector{} + +type collector struct { + lock sync.Mutex + + filter metrics.MetricPushFilter + + metrics []metrics.MCollector + descriptions []metrics.DCollector +} + +func (p *collector) SetFilter(filter metrics.MetricPushFilter) { + p.lock.Lock() + defer p.lock.Unlock() + + p.filter = filter +} + +func (p *collector) RegisterDescription(m metrics.DCollector) { + p.lock.Lock() + defer p.lock.Unlock() + + p.descriptions = append(p.descriptions, m) +} + +func (p *collector) RegisterMetric(m metrics.MCollector) { + p.lock.Lock() + defer p.lock.Unlock() + + p.metrics = append(p.metrics, m) +} + +func (p *collector) Describe(descs chan<- *prometheus.Desc) { + p.lock.Lock() + defer p.lock.Unlock() + + out := metrics.NewPushDescription(descs) + + for id := range p.descriptions { + p.descriptions[id].CollectDescriptions(out) + } +} + +func (p *collector) Collect(c chan<- prometheus.Metric) { + p.lock.Lock() + defer p.lock.Unlock() + + out := metrics.NewPushMetric(c) + + if f := p.filter; f != nil { + out = metrics.NewMetricsPushFilter(out, f) + } + + for id := range p.metrics { + p.metrics[id].CollectMetrics(out) + } +} diff --git a/pkg/util/errors/panics/metric.go b/pkg/util/errors/panics/metric.go index e348b9dea..679ec9d7c 100644 --- a/pkg/util/errors/panics/metric.go +++ b/pkg/util/errors/panics/metric.go @@ -23,15 +23,14 @@ package panics import ( "sync" - "github.com/prometheus/client_golang/prometheus" - "github.com/arangodb/kube-arangodb/pkg/generated/metric_descriptions" "github.com/arangodb/kube-arangodb/pkg/logging" + "github.com/arangodb/kube-arangodb/pkg/metrics/collector" "github.com/arangodb/kube-arangodb/pkg/util/metrics" ) func init() { - prometheus.MustRegister(panicsReceived) + collector.GetCollector().RegisterMetric(panicsReceived) } var ( @@ -45,18 +44,9 @@ type panicsReceiver struct { lock sync.Mutex } -func (p *panicsReceiver) Describe(descs chan<- *prometheus.Desc) { - -} - -func (p *panicsReceiver) Collect(c chan<- prometheus.Metric) { - p.lock.Lock() - defer p.lock.Unlock() - - out := metrics.NewPushMetric(c) - +func (p *panicsReceiver) CollectMetrics(in metrics.PushMetric) { for k, v := range p.panics { - out.Push(metric_descriptions.ArangodbOperatorEnginePanicsRecoveredCounter(float64(v), k)) + in.Push(metric_descriptions.ArangodbOperatorEnginePanicsRecoveredCounter(float64(v), k)) } } diff --git a/pkg/util/metrics/collector.go b/pkg/util/metrics/collector.go index 346960823..ce1c6aab6 100644 --- a/pkg/util/metrics/collector.go +++ b/pkg/util/metrics/collector.go @@ -20,8 +20,15 @@ package metrics -import "github.com/prometheus/client_golang/prometheus" +type MCollector interface { + CollectMetrics(in PushMetric) +} + +type DCollector interface { + CollectDescriptions(in PushDescription) +} type Collector interface { - Collect(description Description, metrics chan<- prometheus.Metric) + MCollector + DCollector } diff --git a/pkg/util/metrics/push_filter.go b/pkg/util/metrics/push_filter.go new file mode 100644 index 000000000..aef54a685 --- /dev/null +++ b/pkg/util/metrics/push_filter.go @@ -0,0 +1,68 @@ +// +// DISCLAIMER +// +// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// + +package metrics + +type MetricPushFilter func(m Metric) bool + +func NegateMetricPushFilter(in MetricPushFilter) MetricPushFilter { + return func(m Metric) bool { + return !in(m) + } +} + +func MergeMetricPushFilter(filters ...MetricPushFilter) MetricPushFilter { + return func(m Metric) bool { + for _, f := range filters { + if f == nil { + continue + } + if !f(m) { + return false + } + } + + return true + } +} + +type metricPushFilter struct { + filter MetricPushFilter + + out PushMetric +} + +func (m metricPushFilter) Push(desc ...Metric) PushMetric { + for id := range desc { + if m.filter(desc[id]) { + m.out.Push(desc[id]) + continue + } + } + + return m +} + +func NewMetricsPushFilter(out PushMetric, filter MetricPushFilter) PushMetric { + return &metricPushFilter{ + filter: filter, + out: out, + } +} diff --git a/pkg/util/metrics/push_filter_prefix.go b/pkg/util/metrics/push_filter_prefix.go new file mode 100644 index 000000000..88d8eef2f --- /dev/null +++ b/pkg/util/metrics/push_filter_prefix.go @@ -0,0 +1,40 @@ +// +// DISCLAIMER +// +// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// + +package metrics + +import ( + "fmt" + "strings" +) + +func NewPrefixMetricPushFilter(prefixes ...string) MetricPushFilter { + return func(m Metric) bool { + for id := range prefixes { + prefix := fmt.Sprintf(`Desc{fqName: "%s`, prefixes[id]) + + if strings.HasPrefix(m.Desc().String(), prefix) { + return true + } + } + + return false + } +} diff --git a/pkg/util/metrics/push_filter_test.go b/pkg/util/metrics/push_filter_test.go new file mode 100644 index 000000000..f77ca3294 --- /dev/null +++ b/pkg/util/metrics/push_filter_test.go @@ -0,0 +1,97 @@ +// +// DISCLAIMER +// +// Copyright 2016-2022 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// + +package metrics + +import ( + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/require" +) + +func Test_PushFilter(t *testing.T) { + m11 := NewDescription("arangodb_a1_metric1", "", []string{}, nil) + m12 := NewDescription("arangodb_a1_metric2", "", []string{}, nil) + m21 := NewDescription("arangodb_a2_metric1", "", []string{}, nil) + m22 := NewDescription("arangodb_a2_metric2", "", []string{}, nil) + + push := func(in PushMetric) { + in.Push(m11.Gauge(1), m12.Gauge(1), m21.Gauge(1), m22.Gauge(1)) + } + + t.Run("AllAccepted", func(t *testing.T) { + c := make(chan prometheus.Metric, 1024) + + push(NewPushMetric(c)) + + require.Len(t, c, 4) + }) + + t.Run("Filter - AcceptAll", func(t *testing.T) { + c := make(chan prometheus.Metric, 1024) + + push(NewMetricsPushFilter(NewPushMetric(c), func(m Metric) bool { + return true + })) + + require.Len(t, c, 4) + }) + + t.Run("Filter - Prefix - Empty", func(t *testing.T) { + c := make(chan prometheus.Metric, 1024) + + push(NewMetricsPushFilter(NewPushMetric(c), NewPrefixMetricPushFilter())) + + require.Len(t, c, 0) + }) + + t.Run("Filter - Prefix - Match one", func(t *testing.T) { + c := make(chan prometheus.Metric, 1024) + + push(NewMetricsPushFilter(NewPushMetric(c), NewPrefixMetricPushFilter("arangodb_a2_metric1"))) + + require.Len(t, c, 1) + }) + + t.Run("Filter - Prefix - Match two", func(t *testing.T) { + c := make(chan prometheus.Metric, 1024) + + push(NewMetricsPushFilter(NewPushMetric(c), NewPrefixMetricPushFilter("arangodb_a2_metric1", "arangodb_a1_metric1"))) + + require.Len(t, c, 2) + }) + + t.Run("Filter - Prefix - Match multi", func(t *testing.T) { + c := make(chan prometheus.Metric, 1024) + + push(NewMetricsPushFilter(NewPushMetric(c), NewPrefixMetricPushFilter("arangodb_a2_"))) + + require.Len(t, c, 2) + }) + + t.Run("Filter - Prefix - Match one - Negate", func(t *testing.T) { + c := make(chan prometheus.Metric, 1024) + + push(NewMetricsPushFilter(NewPushMetric(c), NegateMetricPushFilter(NewPrefixMetricPushFilter("arangodb_a2_metric1")))) + + require.Len(t, c, 3) + }) +}