1
0
Fork 0
mirror of https://github.com/prometheus-operator/prometheus-operator.git synced 2025-04-16 09:16:38 +00:00
prometheus-operator/test/framework/alertmanager.go

484 lines
13 KiB
Go

// Copyright 2016 The prometheus-operator Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package framework
import (
"context"
"encoding/json"
"fmt"
"os"
"strconv"
"strings"
"time"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apimachinery/pkg/util/yaml"
"github.com/prometheus-operator/prometheus-operator/pkg/alertmanager"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"github.com/pkg/errors"
"github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/pkg/textparse"
)
var ValidAlertmanagerConfig = `global:
resolve_timeout: 5m
route:
group_by: ['job']
group_wait: 30s
group_interval: 5m
repeat_interval: 12h
receiver: 'webhook'
receivers:
- name: 'webhook'
webhook_configs:
- url: 'http://alertmanagerwh:30500/'
`
func (f *Framework) MakeBasicAlertmanager(name string, replicas int32) *monitoringv1.Alertmanager {
return &monitoringv1.Alertmanager{
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
Spec: monitoringv1.AlertmanagerSpec{
Replicas: &replicas,
LogLevel: "debug",
},
}
}
func (f *Framework) MakeAlertmanagerService(name, group string, serviceType v1.ServiceType) *v1.Service {
service := &v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("alertmanager-%s", name),
Labels: map[string]string{
"group": group,
},
},
Spec: v1.ServiceSpec{
Type: serviceType,
Ports: []v1.ServicePort{
{
Name: "web",
Port: 9093,
TargetPort: intstr.FromString("web"),
},
},
Selector: map[string]string{
"alertmanager": name,
},
},
}
return service
}
func (f *Framework) SecretFromYaml(filepath string) (*v1.Secret, error) {
manifest, err := os.Open(filepath)
if err != nil {
return nil, err
}
s := v1.Secret{}
err = yaml.NewYAMLOrJSONDecoder(manifest, 100).Decode(&s)
if err != nil {
return nil, err
}
return &s, nil
}
func (f *Framework) AlertmanagerConfigSecret(ns, name string) (*v1.Secret, error) {
s, err := f.SecretFromYaml("../../test/framework/ressources/alertmanager-main-secret.yaml")
if err != nil {
return nil, err
}
s.Name = name
s.Namespace = ns
return s, nil
}
func (f *Framework) CreateAlertmanagerAndWaitUntilReady(ns string, a *monitoringv1.Alertmanager) (*monitoringv1.Alertmanager, error) {
amConfigSecretName := fmt.Sprintf("alertmanager-%s", a.Name)
s, err := f.AlertmanagerConfigSecret(ns, amConfigSecretName)
if err != nil {
return nil, errors.Wrap(err, fmt.Sprintf("making alertmanager config secret %v failed", amConfigSecretName))
}
_, err = f.KubeClient.CoreV1().Secrets(ns).Create(context.TODO(), s, metav1.CreateOptions{})
if err != nil {
return nil, errors.Wrap(err, fmt.Sprintf("creating alertmanager config secret %v failed", s.Name))
}
a, err = f.MonClientV1.Alertmanagers(ns).Create(context.TODO(), a, metav1.CreateOptions{})
if err != nil {
return nil, errors.Wrap(err, fmt.Sprintf("creating alertmanager %v failed", a.Name))
}
return a, f.WaitForAlertmanagerReady(ns, a.Name, int(*a.Spec.Replicas), a.Spec.ForceEnableClusterMode)
}
// WaitForAlertmanagerReady waits for each individual pod as well as the
// cluster as a whole to be ready.
func (f *Framework) WaitForAlertmanagerReady(ns, name string, replicas int, forceEnableClusterMode bool) error {
if err := WaitForPodsReady(
f.KubeClient,
ns,
5*time.Minute,
replicas,
alertmanager.ListOptions(name),
); err != nil {
return errors.Wrap(err,
fmt.Sprintf(
"failed to wait for an Alertmanager cluster (%s) with %d instances to become ready",
name, replicas,
))
}
for i := 0; i < replicas; i++ {
name := fmt.Sprintf("alertmanager-%v-%v", name, strconv.Itoa(i))
if err := f.WaitForAlertmanagerInitialized(ns, name, replicas, forceEnableClusterMode); err != nil {
return errors.Wrap(err,
fmt.Sprintf(
"failed to wait for an Alertmanager cluster (%s) with %d instances to become ready",
name, replicas,
),
)
}
}
return nil
}
func (f *Framework) UpdateAlertmanagerAndWaitUntilReady(ns string, a *monitoringv1.Alertmanager) (*monitoringv1.Alertmanager, error) {
a, err := f.MonClientV1.Alertmanagers(ns).Update(context.TODO(), a, metav1.UpdateOptions{})
if err != nil {
return nil, err
}
err = WaitForPodsReady(
f.KubeClient,
ns,
5*time.Minute,
int(*a.Spec.Replicas),
alertmanager.ListOptions(a.Name),
)
if err != nil {
return nil, fmt.Errorf("failed to update %d Alertmanager instances (%s): %v", a.Spec.Replicas, a.Name, err)
}
return a, nil
}
func (f *Framework) DeleteAlertmanagerAndWaitUntilGone(ns, name string) error {
_, err := f.MonClientV1.Alertmanagers(ns).Get(context.TODO(), name, metav1.GetOptions{})
if err != nil {
return errors.Wrap(err, fmt.Sprintf("requesting Alertmanager tpr %v failed", name))
}
if err := f.MonClientV1.Alertmanagers(ns).Delete(context.TODO(), name, metav1.DeleteOptions{}); err != nil {
return errors.Wrap(err, fmt.Sprintf("deleting Alertmanager tpr %v failed", name))
}
if err := WaitForPodsReady(
f.KubeClient,
ns,
f.DefaultTimeout,
0,
alertmanager.ListOptions(name),
); err != nil {
return errors.Wrap(err, fmt.Sprintf("waiting for Alertmanager tpr (%s) to vanish timed out", name))
}
return f.KubeClient.CoreV1().Secrets(ns).Delete(context.TODO(), fmt.Sprintf("alertmanager-%s", name), metav1.DeleteOptions{})
}
func amImage(version string) string {
return fmt.Sprintf("quay.io/prometheus/alertmanager:%s", version)
}
func (f *Framework) WaitForAlertmanagerInitialized(ns, name string, amountPeers int, forceEnableClusterMode bool) error {
var pollError error
err := wait.Poll(time.Second, time.Minute*5, func() (bool, error) {
amStatus, err := f.GetAlertmanagerStatus(ns, name)
if err != nil {
return false, err
}
isAlertmanagerInClusterMode := amountPeers > 1 || forceEnableClusterMode
if !isAlertmanagerInClusterMode && amStatus.Status == "success" {
return true, nil
}
if amStatus.Data.ClusterStatus == nil {
pollError = fmt.Errorf("do not have a cluster status")
return false, nil
}
if len(amStatus.Data.ClusterStatus.Peers) == amountPeers {
return true, nil
}
pollError = fmt.Errorf(
"failed to get correct amount of peers, expected %d, got %d, addresses %v",
amountPeers,
len(amStatus.Data.ClusterStatus.Peers),
amStatus.Data.ClusterStatus.Peers,
)
return false, nil
})
if err != nil {
return fmt.Errorf("failed to wait for initialized alertmanager cluster: %v: %v", err, pollError)
}
return nil
}
func (f *Framework) GetAlertmanagerStatus(ns, n string) (amAPIStatusResp, error) {
var amStatus amAPIStatusResp
request := ProxyGetPod(f.KubeClient, ns, n, "/api/v1/status")
resp, err := request.DoRaw(context.TODO())
if err != nil {
return amStatus, err
}
if err := json.Unmarshal(resp, &amStatus); err != nil {
return amStatus, err
}
return amStatus, nil
}
func (f *Framework) GetAlertmanagerMetrics(ns, n string) (textparse.Parser, error) {
request := ProxyGetPod(f.KubeClient, ns, n, "/metrics")
resp, err := request.DoRaw(context.TODO())
if err != nil {
return nil, err
}
return textparse.NewPromParser(resp), nil
}
func (f *Framework) CreateSilence(ns, n string) (string, error) {
var createSilenceResponse amAPICreateSilResp
request := ProxyPostPod(
f.KubeClient, ns, n,
"/api/v1/silences",
`{"id":"","createdBy":"Max Mustermann","comment":"1234","startsAt":"2030-04-09T09:16:15.114Z","endsAt":"2031-04-09T11:16:15.114Z","matchers":[{"name":"test","value":"123","isRegex":false}]}`,
)
resp, err := request.DoRaw(context.TODO())
if err != nil {
return "", err
}
if err := json.Unmarshal(resp, &createSilenceResponse); err != nil {
return "", err
}
if createSilenceResponse.Status != "success" {
return "", errors.Errorf(
"expected Alertmanager to return 'success', but got '%v' instead",
createSilenceResponse.Status,
)
}
return createSilenceResponse.Data.SilenceID, nil
}
// alert represents an alert that can be posted to the /api/v1/alerts endpoint
// of an Alertmanager.
// Taken from github.com/prometheus/common/model/alert.go.Alert.
type alert struct {
// Label value pairs for purpose of aggregation, matching, and disposition
// dispatching. This must minimally include an "alertname" label.
Labels map[string]string `json:"labels"`
// Extra key/value information which does not define alert identity.
Annotations map[string]string `json:"annotations"`
// The known time range for this alert. Both ends are optional.
StartsAt time.Time `json:"startsAt,omitempty"`
EndsAt time.Time `json:"endsAt,omitempty"`
GeneratorURL string `json:"generatorURL"`
}
// SendAlertToAlertmanager sends an alert to the alertmanager in the given
// namespace (ns) with the given name (n).
func (f *Framework) SendAlertToAlertmanager(ns, n string, start time.Time) error {
alerts := []*alert{&alert{
Labels: map[string]string{
"alertname": "ExampleAlert", "prometheus": "my-prometheus",
},
Annotations: map[string]string{},
StartsAt: start,
GeneratorURL: "http://prometheus-test-0:9090/graph?g0.expr=vector%281%29\u0026g0.tab=1",
}}
b, err := json.Marshal(alerts)
if err != nil {
return err
}
var postAlertResp amAPIPostAlertResp
request := ProxyPostPod(f.KubeClient, ns, n, "api/v1/alerts", string(b))
resp, err := request.DoRaw(context.TODO())
if err != nil {
return err
}
if err := json.Unmarshal(resp, &postAlertResp); err != nil {
return err
}
if postAlertResp.Status != "success" {
return errors.Errorf("expected Alertmanager to return 'success' but got %q instead", postAlertResp.Status)
}
return nil
}
func (f *Framework) GetSilences(ns, n string) ([]amAPISil, error) {
var getSilencesResponse amAPIGetSilResp
request := ProxyGetPod(f.KubeClient, ns, n, "/api/v1/silences")
resp, err := request.DoRaw(context.TODO())
if err != nil {
return getSilencesResponse.Data, err
}
if err := json.Unmarshal(resp, &getSilencesResponse); err != nil {
return getSilencesResponse.Data, err
}
if getSilencesResponse.Status != "success" {
return getSilencesResponse.Data, errors.Errorf(
"expected Alertmanager to return 'success', but got '%v' instead",
getSilencesResponse.Status,
)
}
return getSilencesResponse.Data, nil
}
// WaitForAlertmanagerConfigToContainString retrieves the Alertmanager
// configuration via the Alertmanager's API and checks if it contains the given
// string.
func (f *Framework) WaitForAlertmanagerConfigToContainString(ns, amName, expectedString string) error {
err := wait.Poll(10*time.Second, time.Minute*5, func() (bool, error) {
config, err := f.GetAlertmanagerStatus(ns, "alertmanager-"+amName+"-0")
if err != nil {
return false, err
}
if strings.Contains(config.Data.ConfigYAML, expectedString) {
return true, nil
}
return false, nil
})
if err != nil {
return fmt.Errorf("failed to wait for alertmanager config to contain %q: %v", expectedString, err)
}
return nil
}
func (f *Framework) WaitForAlertmanagerConfigToBeReloaded(ns, amName string, previousReloadTimestamp time.Time) error {
const configReloadMetricName = "alertmanager_config_last_reload_success_timestamp_seconds"
err := wait.Poll(10*time.Second, time.Minute*5, func() (bool, error) {
parser, err := f.GetAlertmanagerMetrics(ns, "alertmanager-"+amName+"-0")
if err != nil {
return false, err
}
for {
entry, err := parser.Next()
if err != nil {
return false, err
}
if entry == textparse.EntryInvalid {
return false, fmt.Errorf("invalid prometheus metric entry")
}
if entry != textparse.EntrySeries {
continue
}
seriesLabels := labels.Labels{}
parser.Metric(&seriesLabels)
if seriesLabels.Get("__name__") != configReloadMetricName {
continue
}
_, _, timestampSec := parser.Series()
timestamp := time.Unix(int64(timestampSec), 0)
return timestamp.After(previousReloadTimestamp), nil
}
})
if err != nil {
return fmt.Errorf("failed to wait for alertmanager config to have been reloaded after %v: %v", previousReloadTimestamp, err)
}
return nil
}
type amAPICreateSilResp struct {
Status string `json:"status"`
Data amAPICreateSilData `json:"data"`
}
type amAPIPostAlertResp struct {
Status string `json:"status"`
}
type amAPICreateSilData struct {
SilenceID string `json:"silenceId"`
}
type amAPIGetSilResp struct {
Status string `json:"status"`
Data []amAPISil `json:"data"`
}
type amAPISil struct {
ID string `json:"id"`
CreatedBy string `json:"createdBy"`
}
type amAPIStatusResp struct {
Status string `json:"status"`
Data amAPIStatusData `json:"data"`
}
type amAPIStatusData struct {
ClusterStatus *clusterStatus `json:"clusterStatus,omitempty"`
ConfigYAML string `json:"configYAML"`
}
type clusterPeer struct {
Name string `json:"name"`
Address string `json:"address"`
}
type clusterStatus struct {
Peers []clusterPeer `json:"peers"`
}