1
0
Fork 0
mirror of https://github.com/prometheus-operator/prometheus-operator.git synced 2025-04-15 16:56:24 +00:00

prometheus: Enable live migration of rule configmaps to rule file crds

With this patch the Prometheus Operator checks if there are any
Kubernetes ConfigMaps inside the Prometheus namespace which fullfill the
`Spec.RuleSelector` requirement. If so, it creates a RuleFile for each
key in the `ConfigMap.Spec.Data` map inside the Prometheus namespace.
This commit is contained in:
Max Leonard Inden 2018-05-29 16:42:38 +02:00
parent e9b3f65411
commit afe9711ce2
No known key found for this signature in database
GPG key ID: 5403C5464810BC26
18 changed files with 417 additions and 31 deletions

View file

@ -49,7 +49,7 @@ AlertingSpec defines parameters for alerting configuration of Prometheus servers
## Alertmanager
Describes an Alertmanager cluster.
Alertmanager describes an Alertmanager cluster.
| Field | Description | Scheme | Required |
| ----- | ----------- | ------ | -------- |
@ -310,7 +310,7 @@ Rule describes an alerting or recording rule.
| record | | string | false |
| alert | | string | false |
| expr | | string | true |
| for | | time.Duration | false |
| for | | string | false |
| labels | | map[string]string | false |
| annotations | | map[string]string | false |
@ -355,7 +355,7 @@ RuleGroup is a list of sequentially evaluated recording and alerting rules.
| Field | Description | Scheme | Required |
| ----- | ----------- | ------ | -------- |
| name | | string | true |
| interval | | time.Duration | false |
| interval | | string | false |
| rules | | [][Rule](#rule) | true |
[Back to TOC](#table-of-contents)

2
Gopkg.lock generated
View file

@ -557,6 +557,6 @@
[solve-meta]
analyzer-name = "dep"
analyzer-version = 1
inputs-digest = "646697ee6f220efdf952fbc01b526aa9ffe02c19be761674332f6c0c037431f7"
inputs-digest = "a163ed58d0f9c69709808679f6106fbbc4720f28a81444e57d272f9baf87e0a5"
solver-name = "gps-cdcl"
solver-version = 1

View file

@ -45,8 +45,7 @@ e2e-test:
e2e-status:
kubectl get prometheus,alertmanager,servicemonitor,statefulsets,deploy,svc,endpoints,pods,cm,secrets,replicationcontrollers --all-namespaces
e2e:
$(MAKE) container
e2e: container
$(MAKE) e2e-test
e2e-helm:

View file

@ -0,0 +1,71 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus-operator
rules:
- apiGroups:
- extensions
resources:
- thirdpartyresources
verbs:
- '*'
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- '*'
- apiGroups:
- monitoring.coreos.com
resources:
- alertmanagers
- prometheuses
- prometheuses/finalizers
- alertmanagers/finalizers
- servicemonitors
- rulefiles
verbs:
- '*'
- apiGroups:
- apps
resources:
- statefulsets
verbs:
- '*'
- apiGroups:
- ""
resources:
- configmaps
- secrets
verbs:
- '*'
- apiGroups:
- ""
resources:
- pods
verbs:
- list
- delete
- apiGroups:
- ""
resources:
- services
- endpoints
verbs:
- get
- create
- update
- apiGroups:
- ""
resources:
- nodes
verbs:
- list
- watch
- apiGroups:
- ""
resources:
- namespaces
verbs:
- list
- watch

View file

@ -0,0 +1,41 @@
apiVersion: apps/v1beta2
kind: Deployment
metadata:
labels:
k8s-app: prometheus-operator
name: prometheus-operator
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
k8s-app: prometheus-operator
template:
metadata:
labels:
k8s-app: prometheus-operator
spec:
containers:
- args:
- --kubelet-service=kube-system/kubelet
- --config-reloader-image=quay.io/coreos/configmap-reload:v0.0.1
- --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:96d74644
- --log-level=all
image: quay.io/coreos/prometheus-operator:96d74644
name: prometheus-operator
ports:
- containerPort: 8080
name: http
resources:
limits:
cpu: 200m
memory: 100Mi
requests:
cpu: 100m
memory: 50Mi
nodeSelector:
beta.kubernetes.io/os: linux
securityContext:
runAsNonRoot: true
runAsUser: 65534
serviceAccountName: prometheus-operator

View file

@ -11,7 +11,7 @@ spec:
scope: Namespaced
validation:
openAPIV3Schema:
description: Describes an Alertmanager cluster.
description: Alertmanager describes an Alertmanager cluster.
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation

View file

@ -12,7 +12,7 @@ spec:
scope: Namespaced
validation:
openAPIV3Schema:
description: Describes an Alertmanager cluster.
description: Alertmanager describes an Alertmanager cluster.
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation

View file

@ -305,8 +305,7 @@ spec:
and alerting rules.
properties:
interval:
format: int64
type: integer
type: string
name:
type: string
rules:
@ -320,8 +319,7 @@ spec:
expr:
type: string
for:
format: int64
type: integer
type: string
labels:
type: object
record:

View file

@ -56,7 +56,7 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.Alertmanager": {
Schema: spec.Schema{
SchemaProps: spec.SchemaProps{
Description: "Describes an Alertmanager cluster.",
Description: "Alertmanager describes an Alertmanager cluster.",
Properties: map[string]spec.Schema{
"kind": {
SchemaProps: spec.SchemaProps{
@ -1230,8 +1230,8 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA
},
"for": {
SchemaProps: spec.SchemaProps{
Type: []string{"integer"},
Format: "int64",
Type: []string{"string"},
Format: "",
},
},
"labels": {
@ -1386,8 +1386,8 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA
},
"interval": {
SchemaProps: spec.SchemaProps{
Type: []string{"integer"},
Format: "int64",
Type: []string{"string"},
Format: "",
},
},
"rules": {

View file

@ -15,8 +15,6 @@
package v1
import (
"time"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
@ -440,9 +438,9 @@ type RuleFileSpec struct {
// RuleGroup is a list of sequentially evaluated recording and alerting rules.
// +k8s:openapi-gen=true
type RuleGroup struct {
Name string `json:"name"`
Interval time.Duration `json:"interval,omitempty"`
Rules []Rule `json:"rules"`
Name string `json:"name"`
Interval string `json:"interval,omitempty"`
Rules []Rule `json:"rules"`
}
// Rule describes an alerting or recording rule.
@ -451,12 +449,12 @@ type Rule struct {
Record string `json:"record,omitempty"`
Alert string `json:"alert,omitempty"`
Expr string `json:"expr"`
For time.Duration `json:"for,omitempty"`
For string `json:"for,omitempty"`
Labels map[string]string `json:"labels,omitempty"`
Annotations map[string]string `json:"annotations,omitempty"`
}
// Describes an Alertmanager cluster.
// Alertmanager describes an Alertmanager cluster.
// +k8s:openapi-gen=true
type Alertmanager struct {
metav1.TypeMeta `json:",inline"`

View file

@ -730,6 +730,12 @@ func (c *Operator) sync(key string) error {
level.Info(c.logger).Log("msg", "sync prometheus", "key", key)
// TODO: Remove migration with Prometheus Operator v0.21.0
err = c.migrateRuleConfigMapsToRuleFileCRDs(p)
if err != nil {
return err
}
err = c.createOrUpdateRuleFileConfigMap(p)
if err != nil {
return err

View file

@ -0,0 +1,135 @@
// Copyright 2016 The prometheus-operator Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package prometheus
import (
"bytes"
"strings"
monitoringv1 "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1"
"k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/yaml"
"k8s.io/client-go/tools/cache"
"github.com/go-kit/kit/log/level"
"github.com/pkg/errors"
)
func (c *Operator) migrateRuleConfigMapsToRuleFileCRDs(p *monitoringv1.Prometheus) error {
configMaps, err := c.getRuleCMs(p.Namespace, p.Spec.RuleSelector)
if err != nil {
return err
}
configMapNames := []string{}
for _, cm := range configMaps {
configMapNames = append(configMapNames, cm.Name)
}
level.Debug(c.logger).Log(
"msg", "selected rule configmaps for migration",
"configmaps", strings.Join(configMapNames, ","),
"namespace", p.Namespace,
"prometheus", p.Name,
)
ruleFiles := []monitoringv1.RuleFile{}
for _, cm := range configMaps {
files, err := cmToRuleFiles(cm)
if err != nil {
return err
}
ruleFiles = append(ruleFiles, files...)
}
ruleFileNames := []string{}
for _, file := range configMaps {
ruleFileNames = append(ruleFileNames, file.Name)
}
level.Debug(c.logger).Log(
"msg", "rule files to be created",
"rulefiles", strings.Join(ruleFileNames, ","),
"namespace", p.Namespace,
"prometheus", p.Name,
)
for _, ruleFile := range ruleFiles {
_, err := c.mclient.MonitoringV1().RuleFiles(p.Namespace).Create(&ruleFile)
if apierrors.IsAlreadyExists(err) {
level.Debug(c.logger).Log(
"msg", "rule file already exists for configmap key",
"rulefilename", ruleFile.Name,
"namespace", p.Namespace,
"prometheus", p.Name,
)
} else if err != nil {
return err
}
}
level.Debug(c.logger).Log(
"msg", "rule files created successfully",
"namespace", p.Namespace,
"prometheus", p.Name,
)
return nil
}
func (c *Operator) getRuleCMs(ns string, cmLabelSelector *metav1.LabelSelector) ([]*v1.ConfigMap, error) {
cmSelector, err := metav1.LabelSelectorAsSelector(cmLabelSelector)
if err != nil {
return nil, errors.Wrap(err, "convert rule file label selector to selector")
}
configMaps := []*v1.ConfigMap{}
err = cache.ListAllByNamespace(c.cmapInf.GetIndexer(), ns, cmSelector, func(obj interface{}) {
configMaps = append(configMaps, obj.(*v1.ConfigMap))
})
return configMaps, nil
}
func cmToRuleFiles(cm *v1.ConfigMap) ([]monitoringv1.RuleFile, error) {
ruleFiles := []monitoringv1.RuleFile{}
for name, content := range cm.Data {
ruleFileSpec := monitoringv1.RuleFileSpec{}
if err := yaml.NewYAMLOrJSONDecoder(bytes.NewBufferString(content), 1000).Decode(&ruleFileSpec); err != nil {
return []monitoringv1.RuleFile{}, errors.Wrapf(
err,
"unmarshal rules file %v in configmap '%v' in namespace '%v'",
name, cm.Name, cm.Namespace,
)
}
ruleFile := monitoringv1.RuleFile{
ObjectMeta: metav1.ObjectMeta{
Name: cm.Name + "-" + name,
Namespace: cm.Namespace,
Labels: cm.Labels,
},
Spec: ruleFileSpec,
}
ruleFiles = append(ruleFiles, ruleFile)
}
return ruleFiles, nil
}

View file

@ -0,0 +1,61 @@
// Copyright 2016 The prometheus-operator Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package prometheus
import (
"fmt"
"os"
"path/filepath"
"testing"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/yaml"
"github.com/pkg/errors"
)
func TestCMToRuleFiles(t *testing.T) {
cm, err := parseConfigMapYaml("../../contrib/kube-prometheus/manifests/prometheus-rules.yaml")
if err != nil {
t.Fatal(err)
}
_, err = cmToRuleFiles(cm)
if err != nil {
t.Fatal(err)
}
}
// ParseConfigMapYaml takes a path to a yaml file and returns a Kubernetes
// ConfigMap
func parseConfigMapYaml(relativePath string) (*v1.ConfigMap, error) {
absolutPath, err := filepath.Abs(relativePath)
if err != nil {
return nil, errors.Wrap(err, fmt.Sprintf("failed generate absolut file path of %s", relativePath))
}
manifest, err := os.Open(absolutPath)
if err != nil {
return nil, errors.Wrap(err, fmt.Sprintf("failed to open file %s", absolutPath))
}
configMap := v1.ConfigMap{}
if err := yaml.NewYAMLOrJSONDecoder(manifest, 100).Decode(&configMap); err != nil {
return nil, err
}
return &configMap, nil
}

View file

@ -27,9 +27,9 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/cache"
"github.com/ghodss/yaml"
"github.com/go-kit/kit/log/level"
"github.com/pkg/errors"
"gopkg.in/yaml.v2"
)
func (c *Operator) createOrUpdateRuleFileConfigMap(p *monitoringv1.Prometheus) error {
@ -158,7 +158,7 @@ func (c *Operator) selectRuleFiles(p *monitoringv1.Prometheus, namespaces []stri
// sort ruleFiles map
filenames := []string{}
for k, _ := range ruleFiles {
for k := range ruleFiles {
filenames = append(filenames, k)
}
sort.Strings(filenames)
@ -208,5 +208,5 @@ func checksumRuleFiles(files map[string]string) string {
}
func prometheusRuleFilesConfigMapName(prometheusName string) string {
return "prometheus-" + prometheusName + "-rules"
return "prometheus-" + prometheusName + "-rulefiles"
}

View file

@ -515,6 +515,66 @@ func TestPrometheusDeprecatedRuleSelectorField(t *testing.T) {
}
}
func TestPrometheusRuleConfigMapMigration(t *testing.T) {
t.Parallel()
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
name := "my-prometheus"
ruleFileName := "my-alerting-rule-file"
alertName := "ExampleAlert"
cm := v1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: "old-rule-file",
Labels: map[string]string{
"role": "rulefile",
},
},
Data: map[string]string{
ruleFileName: fmt.Sprintf(`
groups:
- name: ./alerting.rules
rules:
- alert: %v
expr: vector(1)
`, alertName),
},
}
framework.KubeClient.CoreV1().ConfigMaps(ns).Create(&cm)
p := framework.MakeBasicPrometheus(ns, name, name, 1)
// Reset new 'RuleFileSelector' field
p.Spec.RuleFileSelector = nil
// Specify old 'RuleFile' field
p.Spec.RuleSelector = &metav1.LabelSelector{
MatchLabels: map[string]string{
"role": "rulefile",
},
}
if err := framework.CreatePrometheusAndWaitUntilReady(ns, p); err != nil {
t.Fatal(err)
}
pSVC := framework.MakePrometheusService(p.Name, "not-relevant", v1.ServiceTypeClusterIP)
if finalizerFn, err := testFramework.CreateServiceAndWaitUntilReady(framework.KubeClient, ns, pSVC); err != nil {
t.Fatal(errors.Wrap(err, "creating Prometheus service failed"))
} else {
ctx.AddFinalizerFn(finalizerFn)
}
if err := framework.WaitForRuleFile(ns, cm.Name+"-"+ruleFileName); err != nil {
t.Fatalf("waiting for rule config map to be converted to rule file crd: %v", err)
}
if err := framework.WaitForPrometheusFiringAlert(ns, pSVC.Name, alertName); err != nil {
t.Fatal(err)
}
}
func TestPrometheusMultipleRuleFilesSameNS(t *testing.T) {
t.Parallel()
@ -649,7 +709,7 @@ func TestPrometheusOnlyUpdatedOnRelevantChanges(t *testing.T) {
KubeClient.
CoreV1().
ConfigMaps(ns).
Get("prometheus-"+prometheusName+"-rules", metav1.GetOptions{})
Get("prometheus-"+prometheusName+"-rulefiles", metav1.GetOptions{})
},
MaxExpectedChanges: 1,
},
@ -766,7 +826,7 @@ func TestPrometheusWhenDeleteCRDCleanUpViaOwnerReference(t *testing.T) {
t.Fatal(err)
}
configMapName := fmt.Sprintf("prometheus-%v-rules", p.Name)
configMapName := fmt.Sprintf("prometheus-%v-rulefiles", p.Name)
_, err := framework.WaitForConfigMapExist(ns, configMapName)
if err != nil {

View file

@ -16,9 +16,12 @@ package framework
import (
"fmt"
"time"
monitoringv1 "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
)
func (f *Framework) MakeBasicRuleFile(ns, name string, groups []monitoringv1.RuleGroup) monitoringv1.RuleFile {
@ -67,6 +70,20 @@ func (f *Framework) MakeAndCreateFiringRuleFile(ns, name, alertName string) (mon
return file, nil
}
// WaitForRuleFile waits for a rule file with a given name to exist in a given
// namespace.
func (f *Framework) WaitForRuleFile(ns, name string) error {
return wait.Poll(time.Second, f.DefaultTimeout, func() (bool, error) {
_, err := f.MonClientV1.RuleFiles(ns).Get(name, metav1.GetOptions{})
if apierrors.IsNotFound(err) {
return false, nil
} else if err != nil {
return false, err
}
return true, nil
})
}
func (f *Framework) UpdateRuleFile(ns string, ar monitoringv1.RuleFile) error {
_, err := f.MonClientV1.RuleFiles(ns).Update(&ar)
if err != nil {