mirror of
https://github.com/prometheus-operator/prometheus-operator.git
synced 2025-04-16 01:06:27 +00:00
* Introduce PrometheusAgent CRD Operator is able to run with PrometheusAgent resources in the cluster, but doesn't do anything with them yet. This is the first step to implement the Prometheus Agent Operator. Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> * Re-enable configmap and secret informers Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> (cherry picked from commit 1a71db03db6b41cd0cee9d0193b6ea3884bb5bae) * Implement Resolve for Agent operator Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> (cherry picked from commit 49558165b9178b6c1bda833a48f7bfe1468c942a) * Operator is able to create Agent Statefulset Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> (cherry picked from commit 7a3826683c92f917312c866a2bb6401dc54b95f2) * Agent Operator creates secret from ServiceMonitors Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> (cherry picked from commit 11232669befb4de9d0765dfadfe5fae00b575f11) * Agent Operator creates secret from PodMonitors Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> (cherry picked from commit 5ae551734bac2babc056c86443d15729d43d12b0) * Agent Operator creates secret from Probes Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> (cherry picked from commit 9637612fbbe9617335fd6188271ebf2cc74a3693) * Agent Operator configures remote-write Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> (cherry picked from commit c4bdf230d527e19f8b77ca5f938b9254ed344f7d) * Agent Operator configures additionalScrapeConfigs Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> (cherry picked from commit d9f28db764641e682bf4fe8963310f791979c387) * Implement UpdateStatus Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> (cherry picked from commit c546ecaf3e8b73916df44a8f48b279c6988e32f5) * Add resource handlers Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> (cherry picked from commit 5b83359445e20f88ea5fff80302fce62d58058b9) * make format Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> (cherry picked from commit 6507964ba28f4ebf32ce3203db752444e288c45d) * Only start agent operator if there is enough permission Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> * Remove node endpoint syncronization from agent operator The server operator already handles it Signed-off-by: ArthurSens <arthursens2005@gmail.com> * Move PrometheusAgent API from v1 to v1alpha1 Signed-off-by: ArthurSens <arthursens2005@gmail.com> * pkg/prometheus/agent/statefulset.go: Fix image concatenation Signed-off-by: ArthurSens <arthursens2005@gmail.com> * Avoid name colisions between Prometheus Agents and Servers Signed-off-by: ArthurSens <arthursens2005@gmail.com> * agent/createOrUpdateConfigurationSecret: Do not handle case where servicemonitor and podmonitor selectors are empty Signed-off-by: ArthurSens <arthursens2005@gmail.com> * make format Signed-off-by: ArthurSens <arthursens2005@gmail.com> * make --always-make format generate Signed-off-by: ArthurSens <arthursens2005@gmail.com> * Remove unused fields from Operator struct Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> * Add deployment mode as new selector label for agent/server ssts Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> * WIP: Fix OperatorUpgrade e2e test Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> * Panic if type casting PrometheusInterface doesn't return Prometheus/Agent Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> * Detect whether PrometheusAgent CRD is installed or not If the operator's service account has all permissions on the cluster and the CRD isn't installed then the PrometheusAgent controller will run but fail because of the absence of the CRD. Signed-off-by: Simon Pasquier <spasquie@redhat.com> * Create dedicated governing service for Prometheus agent Signed-off-by: Simon Pasquier <spasquie@redhat.com> --------- Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> Signed-off-by: ArthurSens <arthursens2005@gmail.com> Signed-off-by: Simon Pasquier <spasquie@redhat.com> Co-authored-by: Simon Pasquier <spasquie@redhat.com>
282 lines
8.8 KiB
Go
282 lines
8.8 KiB
Go
// Copyright 2016 The prometheus-operator Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package prometheus
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"reflect"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/go-kit/log"
|
|
"github.com/go-kit/log/level"
|
|
"github.com/pkg/errors"
|
|
"github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring"
|
|
"github.com/prometheus-operator/prometheus-operator/pkg/operator"
|
|
v1 "k8s.io/api/core/v1"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
|
|
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
|
|
namespacelabeler "github.com/prometheus-operator/prometheus-operator/pkg/namespacelabeler"
|
|
prompkg "github.com/prometheus-operator/prometheus-operator/pkg/prometheus"
|
|
)
|
|
|
|
// The maximum `Data` size of a ConfigMap seems to differ between
|
|
// environments. This is probably due to different meta data sizes which count
|
|
// into the overall maximum size of a ConfigMap. Thereby lets leave a
|
|
// large buffer.
|
|
var maxConfigMapDataSize = int(float64(v1.MaxSecretSize) * 0.5)
|
|
|
|
func (c *Operator) createOrUpdateRuleConfigMaps(ctx context.Context, p *monitoringv1.Prometheus) ([]string, error) {
|
|
cClient := c.kclient.CoreV1().ConfigMaps(p.Namespace)
|
|
|
|
namespaces, err := c.selectRuleNamespaces(p)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
excludedFromEnforcement := p.Spec.ExcludedFromEnforcement
|
|
// append the deprecated PrometheusRulesExcludedFromEnforce
|
|
for _, rule := range p.Spec.PrometheusRulesExcludedFromEnforce {
|
|
excludedFromEnforcement = append(excludedFromEnforcement,
|
|
monitoringv1.ObjectReference{
|
|
Namespace: rule.RuleNamespace,
|
|
Group: monitoring.GroupName,
|
|
Resource: monitoringv1.PrometheusRuleName,
|
|
Name: rule.RuleName,
|
|
})
|
|
}
|
|
nsLabeler := namespacelabeler.New(
|
|
p.Spec.EnforcedNamespaceLabel,
|
|
excludedFromEnforcement,
|
|
true,
|
|
)
|
|
|
|
logger := log.With(c.logger, "prometheus", p.Name, "namespace", p.Namespace)
|
|
promRuleSelector, err := operator.NewPrometheusRuleSelector(operator.PrometheusFormat, p.Spec.RuleSelector, nsLabeler, c.ruleInfs, logger)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "initializing PrometheusRules failed")
|
|
}
|
|
|
|
newRules, rejected, err := promRuleSelector.Select(namespaces)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "selecting PrometheusRules failed")
|
|
}
|
|
|
|
if pKey, ok := c.accessor.MetaNamespaceKey(p); ok {
|
|
c.metrics.SetSelectedResources(pKey, monitoringv1.PrometheusRuleKind, len(newRules))
|
|
c.metrics.SetRejectedResources(pKey, monitoringv1.PrometheusRuleKind, rejected)
|
|
}
|
|
|
|
currentConfigMapList, err := cClient.List(ctx, prometheusRulesConfigMapSelector(p.Name))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
currentConfigMaps := currentConfigMapList.Items
|
|
|
|
currentRules := map[string]string{}
|
|
for _, cm := range currentConfigMaps {
|
|
for ruleFileName, ruleFile := range cm.Data {
|
|
currentRules[ruleFileName] = ruleFile
|
|
}
|
|
}
|
|
|
|
equal := reflect.DeepEqual(newRules, currentRules)
|
|
if equal && len(currentConfigMaps) != 0 {
|
|
level.Debug(c.logger).Log(
|
|
"msg", "no PrometheusRule changes",
|
|
"namespace", p.Namespace,
|
|
"prometheus", p.Name,
|
|
)
|
|
currentConfigMapNames := []string{}
|
|
for _, cm := range currentConfigMaps {
|
|
currentConfigMapNames = append(currentConfigMapNames, cm.Name)
|
|
}
|
|
return currentConfigMapNames, nil
|
|
}
|
|
|
|
newConfigMaps, err := makeRulesConfigMaps(p, newRules)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "failed to make rules ConfigMaps")
|
|
}
|
|
|
|
newConfigMapNames := []string{}
|
|
for _, cm := range newConfigMaps {
|
|
newConfigMapNames = append(newConfigMapNames, cm.Name)
|
|
}
|
|
|
|
if len(currentConfigMaps) == 0 {
|
|
level.Debug(c.logger).Log(
|
|
"msg", "no PrometheusRule configmap found, creating new one",
|
|
"namespace", p.Namespace,
|
|
"prometheus", p.Name,
|
|
)
|
|
for _, cm := range newConfigMaps {
|
|
_, err = cClient.Create(ctx, &cm, metav1.CreateOptions{})
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "failed to create ConfigMap '%v'", cm.Name)
|
|
}
|
|
}
|
|
return newConfigMapNames, nil
|
|
}
|
|
|
|
// Simply deleting old ConfigMaps and creating new ones for now. Could be
|
|
// replaced by logic that only deletes obsolete ConfigMaps in the future.
|
|
for _, cm := range currentConfigMaps {
|
|
err := cClient.Delete(ctx, cm.Name, metav1.DeleteOptions{})
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "failed to delete current ConfigMap '%v'", cm.Name)
|
|
}
|
|
}
|
|
|
|
level.Debug(c.logger).Log(
|
|
"msg", "updating PrometheusRule",
|
|
"namespace", p.Namespace,
|
|
"prometheus", p.Name,
|
|
)
|
|
for _, cm := range newConfigMaps {
|
|
_, err = cClient.Create(ctx, &cm, metav1.CreateOptions{})
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "failed to create new ConfigMap '%v'", cm.Name)
|
|
}
|
|
}
|
|
|
|
return newConfigMapNames, nil
|
|
}
|
|
|
|
func prometheusRulesConfigMapSelector(prometheusName string) metav1.ListOptions {
|
|
return metav1.ListOptions{LabelSelector: fmt.Sprintf("%v=%v", prompkg.LabelPrometheusName, prometheusName)}
|
|
}
|
|
|
|
func (c *Operator) selectRuleNamespaces(p *monitoringv1.Prometheus) ([]string, error) {
|
|
namespaces := []string{}
|
|
|
|
// If 'RuleNamespaceSelector' is nil, only check own namespace.
|
|
if p.Spec.RuleNamespaceSelector == nil {
|
|
namespaces = append(namespaces, p.Namespace)
|
|
} else {
|
|
ruleNamespaceSelector, err := metav1.LabelSelectorAsSelector(p.Spec.RuleNamespaceSelector)
|
|
if err != nil {
|
|
return namespaces, errors.Wrap(err, "convert rule namespace label selector to selector")
|
|
}
|
|
|
|
namespaces, err = operator.ListMatchingNamespaces(ruleNamespaceSelector, c.nsMonInf)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
level.Debug(c.logger).Log(
|
|
"msg", "selected RuleNamespaces",
|
|
"namespaces", strings.Join(namespaces, ","),
|
|
"namespace", p.Namespace,
|
|
"prometheus", p.Name,
|
|
)
|
|
|
|
return namespaces, nil
|
|
}
|
|
|
|
// makeRulesConfigMaps takes a Prometheus configuration and rule files and
|
|
// returns a list of Kubernetes ConfigMaps to be later on mounted into the
|
|
// Prometheus instance.
|
|
// If the total size of rule files exceeds the Kubernetes ConfigMap limit,
|
|
// they are split up via the simple first-fit [1] bin packing algorithm. In the
|
|
// future this can be replaced by a more sophisticated algorithm, but for now
|
|
// simplicity should be sufficient.
|
|
// [1] https://en.wikipedia.org/wiki/Bin_packing_problem#First-fit_algorithm
|
|
func makeRulesConfigMaps(p *monitoringv1.Prometheus, ruleFiles map[string]string) ([]v1.ConfigMap, error) {
|
|
//check if none of the rule files is too large for a single ConfigMap
|
|
for filename, file := range ruleFiles {
|
|
if len(file) > maxConfigMapDataSize {
|
|
return nil, errors.Errorf(
|
|
"rule file '%v' is too large for a single Kubernetes ConfigMap",
|
|
filename,
|
|
)
|
|
}
|
|
}
|
|
|
|
buckets := []map[string]string{
|
|
{},
|
|
}
|
|
currBucketIndex := 0
|
|
|
|
// To make bin packing algorithm deterministic, sort ruleFiles filenames and
|
|
// iterate over filenames instead of ruleFiles map (not deterministic).
|
|
fileNames := []string{}
|
|
for n := range ruleFiles {
|
|
fileNames = append(fileNames, n)
|
|
}
|
|
sort.Strings(fileNames)
|
|
|
|
for _, filename := range fileNames {
|
|
// If rule file doesn't fit into current bucket, create new bucket.
|
|
if bucketSize(buckets[currBucketIndex])+len(ruleFiles[filename]) > maxConfigMapDataSize {
|
|
buckets = append(buckets, map[string]string{})
|
|
currBucketIndex++
|
|
}
|
|
buckets[currBucketIndex][filename] = ruleFiles[filename]
|
|
}
|
|
|
|
ruleFileConfigMaps := []v1.ConfigMap{}
|
|
for i, bucket := range buckets {
|
|
cm := makeRulesConfigMap(p, bucket)
|
|
cm.Name = cm.Name + "-" + strconv.Itoa(i)
|
|
ruleFileConfigMaps = append(ruleFileConfigMaps, cm)
|
|
}
|
|
|
|
return ruleFileConfigMaps, nil
|
|
}
|
|
|
|
func bucketSize(bucket map[string]string) int {
|
|
totalSize := 0
|
|
for _, v := range bucket {
|
|
totalSize += len(v)
|
|
}
|
|
|
|
return totalSize
|
|
}
|
|
|
|
func makeRulesConfigMap(p *monitoringv1.Prometheus, ruleFiles map[string]string) v1.ConfigMap {
|
|
boolTrue := true
|
|
|
|
labels := map[string]string{prompkg.LabelPrometheusName: p.Name}
|
|
for k, v := range prompkg.ManagedByOperatorLabels {
|
|
labels[k] = v
|
|
}
|
|
|
|
return v1.ConfigMap{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Name: prometheusRuleConfigMapName(p.Name),
|
|
Labels: labels,
|
|
OwnerReferences: []metav1.OwnerReference{
|
|
{
|
|
APIVersion: p.APIVersion,
|
|
BlockOwnerDeletion: &boolTrue,
|
|
Controller: &boolTrue,
|
|
Kind: p.Kind,
|
|
Name: p.Name,
|
|
UID: p.UID,
|
|
},
|
|
},
|
|
},
|
|
Data: ruleFiles,
|
|
}
|
|
}
|
|
|
|
func prometheusRuleConfigMapName(prometheusName string) string {
|
|
return "prometheus-" + prometheusName + "-rulefiles"
|
|
}
|