1
0
Fork 0
mirror of https://github.com/prometheus-operator/prometheus-operator.git synced 2025-04-16 01:06:27 +00:00
prometheus-operator/test/e2e/alertmanager_test.go
2018-06-05 13:34:13 +02:00

495 lines
14 KiB
Go

// Copyright 2016 The prometheus-operator Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package e2e
import (
"fmt"
"log"
"strconv"
"strings"
"testing"
"time"
"github.com/pkg/errors"
appsv1 "k8s.io/api/apps/v1beta2"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/wait"
testFramework "github.com/coreos/prometheus-operator/test/framework"
)
func TestAlertmanagerCreateDeleteCluster(t *testing.T) {
t.Parallel()
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
name := "test"
if err := framework.CreateAlertmanagerAndWaitUntilReady(ns, framework.MakeBasicAlertmanager(name, 3)); err != nil {
t.Fatal(err)
}
if err := framework.DeleteAlertmanagerAndWaitUntilGone(ns, name); err != nil {
t.Fatal(err)
}
}
func TestAlertmanagerScaling(t *testing.T) {
t.Parallel()
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
name := "test"
if err := framework.CreateAlertmanagerAndWaitUntilReady(ns, framework.MakeBasicAlertmanager(name, 3)); err != nil {
t.Fatal(err)
}
if err := framework.UpdateAlertmanagerAndWaitUntilReady(ns, framework.MakeBasicAlertmanager(name, 5)); err != nil {
t.Fatal(err)
}
if err := framework.UpdateAlertmanagerAndWaitUntilReady(ns, framework.MakeBasicAlertmanager(name, 3)); err != nil {
t.Fatal(err)
}
}
func TestAlertmanagerVersionMigration(t *testing.T) {
t.Parallel()
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
name := "test"
am := framework.MakeBasicAlertmanager(name, 1)
am.Spec.Version = "v0.14.0"
if err := framework.CreateAlertmanagerAndWaitUntilReady(ns, am); err != nil {
t.Fatal(err)
}
am.Spec.Version = "v0.15.0-rc.1"
if err := framework.UpdateAlertmanagerAndWaitUntilReady(ns, am); err != nil {
t.Fatal(err)
}
am.Spec.Version = "v0.14.0"
if err := framework.UpdateAlertmanagerAndWaitUntilReady(ns, am); err != nil {
t.Fatal(err)
}
}
func TestExposingAlertmanagerWithKubernetesAPI(t *testing.T) {
t.Parallel()
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
alertmanager := framework.MakeBasicAlertmanager("test-alertmanager", 1)
alertmanagerService := framework.MakeAlertmanagerService(alertmanager.Name, "alertmanager-service", v1.ServiceTypeClusterIP)
if err := framework.CreateAlertmanagerAndWaitUntilReady(ns, alertmanager); err != nil {
t.Fatal(err)
}
if _, err := testFramework.CreateServiceAndWaitUntilReady(framework.KubeClient, ns, alertmanagerService); err != nil {
t.Fatal(err)
}
proxyGet := framework.KubeClient.CoreV1().Services(ns).ProxyGet
request := proxyGet("", alertmanagerService.Name, "web", "/", make(map[string]string))
_, err := request.DoRaw()
if err != nil {
t.Fatal(err)
}
}
func TestMeshInitialization(t *testing.T) {
t.Parallel()
// Starting with Alertmanager v0.15.0 hashicorp/memberlist is used for HA.
// Make sure both memberlist as well as mesh (< 0.15.0) work
amVersions := []string{"v0.14.0", "v0.15.0-rc.1"}
for _, v := range amVersions {
version := v
t.Run(
fmt.Sprintf("amVersion%v", strings.Replace(version, ".", "-", -1)),
func(t *testing.T) {
t.Parallel()
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
amClusterSize := 3
alertmanager := framework.MakeBasicAlertmanager("test", int32(amClusterSize))
alertmanager.Spec.Version = version
alertmanagerService := framework.MakeAlertmanagerService(alertmanager.Name, "alertmanager-service", v1.ServiceTypeClusterIP)
if err := framework.CreateAlertmanagerAndWaitUntilReady(ns, alertmanager); err != nil {
t.Fatal(err)
}
if _, err := testFramework.CreateServiceAndWaitUntilReady(framework.KubeClient, ns, alertmanagerService); err != nil {
t.Fatal(err)
}
for i := 0; i < amClusterSize; i++ {
name := "alertmanager-" + alertmanager.Name + "-" + strconv.Itoa(i)
if err := framework.WaitForAlertmanagerInitializedMesh(ns, name, amClusterSize); err != nil {
t.Fatal(err)
}
}
},
)
}
}
func TestAlertmanagerClusterGossipSilences(t *testing.T) {
t.Parallel()
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
amClusterSize := 3
alertmanager := framework.MakeBasicAlertmanager("test", int32(amClusterSize))
alertmanager.Spec.Version = "v0.15.0-rc.1"
if err := framework.CreateAlertmanagerAndWaitUntilReady(ns, alertmanager); err != nil {
t.Fatal(err)
}
for i := 0; i < amClusterSize; i++ {
name := "alertmanager-" + alertmanager.Name + "-" + strconv.Itoa(i)
if err := framework.WaitForAlertmanagerInitializedMesh(ns, name, amClusterSize); err != nil {
t.Fatal(err)
}
}
silId, err := framework.CreateSilence(ns, "alertmanager-test-0")
if err != nil {
t.Fatalf("failed to create silence: %v", err)
}
for i := 0; i < amClusterSize; i++ {
err = wait.Poll(time.Second, framework.DefaultTimeout, func() (bool, error) {
silences, err := framework.GetSilences(ns, "alertmanager-"+alertmanager.Name+"-"+strconv.Itoa(i))
if err != nil {
return false, err
}
if len(silences) != 1 {
return false, nil
}
if silences[0].ID != silId {
return false, errors.Errorf("expected silence id on alertmanager %v to match id of created silence '%v' but got %v", i, silId, silences[0].ID)
}
return true, nil
})
if err != nil {
t.Fatalf("could not retrieve created silence on alertmanager %v: %v", i, err)
}
}
}
func TestAlertmanagerReloadConfig(t *testing.T) {
t.Parallel()
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
alertmanager := framework.MakeBasicAlertmanager("reload-config", 1)
firstConfig := `
global:
resolve_timeout: 5m
route:
group_by: ['job']
group_wait: 30s
group_interval: 5m
repeat_interval: 12h
receiver: 'webhook'
receivers:
- name: 'webhook'
webhook_configs:
- url: 'http://firstConfigWebHook:30500/'
`
secondConfig := `
global:
resolve_timeout: 5m
route:
group_by: ['job']
group_wait: 30s
group_interval: 5m
repeat_interval: 12h
receiver: 'webhook'
receivers:
- name: 'webhook'
webhook_configs:
- url: 'http://secondConfigWebHook:30500/'
`
cfg := &v1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("alertmanager-%s", alertmanager.Name),
},
Data: map[string][]byte{
"alertmanager.yaml": []byte(firstConfig),
},
}
if err := framework.CreateAlertmanagerAndWaitUntilReady(ns, alertmanager); err != nil {
t.Fatal(err)
}
if _, err := framework.KubeClient.CoreV1().Secrets(ns).Update(cfg); err != nil {
t.Fatal(err)
}
firstExpectedString := "firstConfigWebHook"
log.Println("waiting for first expected config")
if err := framework.WaitForAlertmanagerConfigToContainString(ns, alertmanager.Name, firstExpectedString); err != nil {
t.Fatal(err)
}
log.Println("first expected config found")
cfg.Data["alertmanager.yaml"] = []byte(secondConfig)
if _, err := framework.KubeClient.CoreV1().Secrets(ns).Update(cfg); err != nil {
t.Fatal(err)
}
secondExpectedString := "secondConfigWebHook"
log.Println("waiting for second expected config")
if err := framework.WaitForAlertmanagerConfigToContainString(ns, alertmanager.Name, secondExpectedString); err != nil {
t.Fatal(err)
}
log.Println("second expected config found")
}
func TestAlertmanagerZeroDowntimeRollingDeployment(t *testing.T) {
t.Parallel()
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
alertName := "ExampleAlert"
whReplicas := int32(1)
whdpl := &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: "alertmanager-webhook",
},
Spec: appsv1.DeploymentSpec{
Replicas: &whReplicas,
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app": "alertmanager-webhook",
},
},
Template: v1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"app": "alertmanager-webhook",
},
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "webhook-server",
Image: "quay.io/coreos/prometheus-alertmanager-test-webhook",
Ports: []v1.ContainerPort{
{
Name: "web",
ContainerPort: 5001,
},
},
},
},
},
},
},
}
whsvc := &v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: "alertmanager-webhook",
},
Spec: v1.ServiceSpec{
Type: v1.ServiceTypeClusterIP,
Ports: []v1.ServicePort{
v1.ServicePort{
Name: "web",
Port: 5001,
TargetPort: intstr.FromString("web"),
},
},
Selector: map[string]string{
"app": "alertmanager-webhook",
},
},
}
if err := testFramework.CreateDeployment(framework.KubeClient, ns, whdpl); err != nil {
t.Fatal(err)
}
if _, err := testFramework.CreateServiceAndWaitUntilReady(framework.KubeClient, ns, whsvc); err != nil {
t.Fatal(err)
}
err := testFramework.WaitForPodsReady(framework.KubeClient, ns, time.Minute*5, 1,
metav1.ListOptions{
LabelSelector: fields.SelectorFromSet(fields.Set(map[string]string{
"app": "alertmanager-webhook",
})).String(),
},
)
if err != nil {
t.Fatal(err)
}
alertmanager := framework.MakeBasicAlertmanager("rolling-deploy", 3)
alertmanager.Spec.Version = "v0.13.0"
amsvc := framework.MakeAlertmanagerService(alertmanager.Name, "test", v1.ServiceTypeClusterIP)
amcfg := &v1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("alertmanager-%s", alertmanager.Name),
},
Data: map[string][]byte{
"alertmanager.yaml": []byte(fmt.Sprintf(`
global:
resolve_timeout: 5m
route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
receiver: 'webhook'
receivers:
- name: 'webhook'
webhook_configs:
- url: 'http://%s.%s.svc:5001/'
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
`, whsvc.Name, ns)),
},
}
if _, err := framework.KubeClient.CoreV1().Secrets(ns).Create(amcfg); err != nil {
t.Fatal(err)
}
if _, err := framework.MonClientV1.Alertmanagers(ns).Create(alertmanager); err != nil {
t.Fatal(err)
}
if _, err := testFramework.CreateServiceAndWaitUntilReady(framework.KubeClient, ns, amsvc); err != nil {
t.Fatal(err)
}
p := framework.MakeBasicPrometheus(ns, "test", "test", 3)
p.Spec.EvaluationInterval = "100ms"
framework.AddAlertingToPrometheus(p, ns, alertmanager.Name)
_, err = framework.MakeAndCreateFiringRule(ns, p.Name, alertName)
if err != nil {
t.Fatal(err)
}
if err := framework.CreatePrometheusAndWaitUntilReady(ns, p); err != nil {
t.Fatal(err)
}
pSVC := framework.MakePrometheusService(p.Name, "not-relevant", v1.ServiceTypeClusterIP)
if finalizerFn, err := testFramework.CreateServiceAndWaitUntilReady(framework.KubeClient, ns, pSVC); err != nil {
t.Fatal(errors.Wrap(err, "creating Prometheus service failed"))
} else {
ctx.AddFinalizerFn(finalizerFn)
}
// The Prometheus config reloader reloads Prometheus periodically, not on
// alert rule change. Thereby one has to wait for Prometheus actually firing
// the alert.
err = framework.WaitForPrometheusFiringAlert(p.Namespace, pSVC.Name, alertName)
if err != nil {
t.Fatal(err)
}
// Wait for alert to propagate
time.Sleep(10 * time.Second)
opts := metav1.ListOptions{
LabelSelector: fields.SelectorFromSet(fields.Set(map[string]string{
"app": "alertmanager-webhook",
})).String(),
}
pl, err := framework.KubeClient.Core().Pods(ns).List(opts)
if err != nil {
t.Fatal(err)
}
if len(pl.Items) != 1 {
t.Fatalf("Expected one webhook pod, but got %d", len(pl.Items))
}
podName := pl.Items[0].Name
logs, err := testFramework.GetLogs(framework.KubeClient, ns, podName, "webhook-server")
if err != nil {
t.Fatal(err)
}
c := strings.Count(logs, "Alertmanager Notification Payload Received")
if c != 1 {
t.Fatalf("One notification expected, but %d received.\n\n%s", c, logs)
}
alertmanager.Spec.Version = "v0.14.0"
if _, err := framework.MonClientV1.Alertmanagers(ns).Update(alertmanager); err != nil {
t.Fatal(err)
}
time.Sleep(1 * time.Minute)
logs, err = testFramework.GetLogs(framework.KubeClient, ns, podName, "webhook-server")
if err != nil {
t.Fatal(err)
}
c = strings.Count(logs, "Alertmanager Notification Payload Received")
if c != 1 {
t.Fatalf("Only one notification expected, but %d received after rolling update of Alertmanager cluster.\n\n%s", c, logs)
}
}