mirror of
https://github.com/prometheus-operator/prometheus-operator.git
synced 2025-04-21 11:48:53 +00:00
Support Alertmanager v0.15.0
With Alertmanager v0.15.0 the HA command line flag prefix changes from "mesh" to "cluster" and peer ports need to be specified. This patch enables running Alertmanager v0.15.0 with the Prometheus operator without breaking backward compatibility. In addition it does the following refactoring: - Instead of generating the stateful set for an old AM version, and then addressing all necessary changes up to the newest version, generate an up to date stateful set version and address any changes necessary for backward compatibility. - Add unit tests for the various flag changes in between Alertmanager version. - Add v0.15.0 mesh initialization test
This commit is contained in:
parent
0079fb4e7f
commit
713dd05438
9 changed files with 251 additions and 42 deletions
Documentation
example/prometheus-operator-crd
pkg
test
|
@ -89,7 +89,7 @@ Specification of the desired behavior of the Alertmanager cluster. More info: ht
|
|||
| ----- | ----------- | ------ | -------- |
|
||||
| podMetadata | Standard object’s metadata. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#metadata Metadata Labels and Annotations gets propagated to the prometheus pods. | *[metav1.ObjectMeta](https://v1-6.docs.kubernetes.io/docs/api-reference/v1.6/#objectmeta-v1-meta) | false |
|
||||
| version | Version the cluster should be on. | string | false |
|
||||
| baseImage | Base image that is used to deploy pods. | string | false |
|
||||
| baseImage | Base image that is used to deploy pods, without tag. | string | false |
|
||||
| imagePullSecrets | An optional list of references to secrets in the same namespace to use for pulling prometheus and alertmanager images from registries see http://kubernetes.io/docs/user-guide/images#specifying-imagepullsecrets-on-a-pod | [][v1.LocalObjectReference](https://v1-6.docs.kubernetes.io/docs/api-reference/v1.6/#localobjectreference-v1-core) | false |
|
||||
| replicas | Size is the expected size of the alertmanager cluster. The controller will eventually make the size of the running cluster equal to the expected size. | *int32 | false |
|
||||
| storage | Storage is the definition of how storage will be used by the Alertmanager instances. | *[StorageSpec](#storagespec) | false |
|
||||
|
|
|
@ -533,7 +533,7 @@ spec:
|
|||
type: string
|
||||
type: array
|
||||
baseImage:
|
||||
description: Base image that is used to deploy pods.
|
||||
description: Base image that is used to deploy pods, without tag.
|
||||
type: string
|
||||
containers:
|
||||
description: Containers allows injecting additional containers. This
|
||||
|
|
|
@ -168,29 +168,30 @@ func makeStatefulSetSpec(a *monitoringv1.Alertmanager, config Config) (*v1beta1.
|
|||
|
||||
version, err := semver.Parse(versionStr)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "parse version")
|
||||
return nil, errors.Wrap(err, "failed to parse alertmanager version")
|
||||
}
|
||||
|
||||
amArgs := []string{
|
||||
fmt.Sprintf("-config.file=%s", alertmanagerConfFile),
|
||||
fmt.Sprintf("-mesh.listen-address=:%d", 6783),
|
||||
fmt.Sprintf("-storage.path=%s", alertmanagerStorageDir),
|
||||
fmt.Sprintf("--config.file=%s", alertmanagerConfFile),
|
||||
fmt.Sprintf("--cluster.listen-address=:%d", 6783),
|
||||
fmt.Sprintf("--storage.path=%s", alertmanagerStorageDir),
|
||||
}
|
||||
|
||||
if a.Spec.ListenLocal {
|
||||
amArgs = append(amArgs, "-web.listen-address=127.0.0.1:9093")
|
||||
amArgs = append(amArgs, "--web.listen-address=127.0.0.1:9093")
|
||||
} else {
|
||||
amArgs = append(amArgs, "-web.listen-address=:9093")
|
||||
amArgs = append(amArgs, "--web.listen-address=:9093")
|
||||
}
|
||||
|
||||
if a.Spec.ExternalURL != "" {
|
||||
amArgs = append(amArgs, "-web.external-url="+a.Spec.ExternalURL)
|
||||
amArgs = append(amArgs, "--web.external-url="+a.Spec.ExternalURL)
|
||||
}
|
||||
|
||||
webRoutePrefix := "/"
|
||||
if a.Spec.RoutePrefix != "" {
|
||||
webRoutePrefix = a.Spec.RoutePrefix
|
||||
}
|
||||
amArgs = append(amArgs, fmt.Sprintf("--web.route-prefix=%v", webRoutePrefix))
|
||||
|
||||
localReloadURL := &url.URL{
|
||||
Scheme: "http",
|
||||
|
@ -241,7 +242,7 @@ func makeStatefulSetSpec(a *monitoringv1.Alertmanager, config Config) (*v1beta1.
|
|||
podLabels["alertmanager"] = a.Name
|
||||
|
||||
for i := int32(0); i < *a.Spec.Replicas; i++ {
|
||||
amArgs = append(amArgs, fmt.Sprintf("-mesh.peer=%s-%d.%s.%s.svc", prefixedName(a.Name), i, governingServiceName, a.Namespace))
|
||||
amArgs = append(amArgs, fmt.Sprintf("--cluster.peer=%s-%d.%s.%s.svc:6783", prefixedName(a.Name), i, governingServiceName, a.Namespace))
|
||||
}
|
||||
|
||||
ports := []v1.ContainerPort{
|
||||
|
@ -275,17 +276,32 @@ func makeStatefulSetSpec(a *monitoringv1.Alertmanager, config Config) (*v1beta1.
|
|||
securityContext = a.Spec.SecurityContext
|
||||
}
|
||||
|
||||
// Adjust Alertmanager command line args to specified AM version
|
||||
switch version.Major {
|
||||
case 0:
|
||||
if version.Minor >= 7 {
|
||||
amArgs = append(amArgs, "-web.route-prefix="+webRoutePrefix)
|
||||
}
|
||||
if version.Minor >= 13 {
|
||||
if version.Minor < 15 {
|
||||
for i := range amArgs {
|
||||
// starting with v0.13.0 of Alertmanager all flags are with double dashes.
|
||||
amArgs[i] = "-" + amArgs[i]
|
||||
// below Alertmanager v0.15.0 peer address port specification is not necessary
|
||||
if strings.Contains(amArgs[i], "--cluster.peer") {
|
||||
amArgs[i] = strings.TrimSuffix(amArgs[i], ":6783")
|
||||
}
|
||||
|
||||
// below Alertmanager v0.15.0 high availability flags are prefixed with 'mesh' instead of 'cluster'
|
||||
amArgs[i] = strings.Replace(amArgs[i], "--cluster.", "--mesh.", 1)
|
||||
}
|
||||
}
|
||||
if version.Minor < 13 {
|
||||
for i := range amArgs {
|
||||
// below Alertmanager v0.13.0 all flags are with single dash.
|
||||
amArgs[i] = strings.Replace(amArgs[i], "--", "-", 1)
|
||||
}
|
||||
}
|
||||
if version.Minor < 7 {
|
||||
// below Alertmanager v0.7.0 the flag 'web.route-prefix' does not exist
|
||||
amArgs = filter(amArgs, func(s string) bool {
|
||||
return !strings.Contains(s, "web.route-prefix")
|
||||
})
|
||||
}
|
||||
default:
|
||||
return nil, errors.Errorf("unsupported Alertmanager major version %s", version)
|
||||
}
|
||||
|
@ -385,3 +401,13 @@ func subPathForStorage(s *monitoringv1.StorageSpec) string {
|
|||
|
||||
return "alertmanager-db"
|
||||
}
|
||||
|
||||
func filter(strings []string, f func(string) bool) []string {
|
||||
filteredStrings := make([]string, 0)
|
||||
for _, s := range strings {
|
||||
if f(s) {
|
||||
filteredStrings = append(filteredStrings, s)
|
||||
}
|
||||
}
|
||||
return filteredStrings
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
package alertmanager
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
|
@ -210,3 +211,152 @@ func TestListenLocal(t *testing.T) {
|
|||
t.Fatal("Alertmanager container should only have one port defined")
|
||||
}
|
||||
}
|
||||
|
||||
// below Alertmanager v0.13.0 all flags are with single dash.
|
||||
func TestMakeStatefulSetSpecSingleDoubleDashedArgs(t *testing.T) {
|
||||
tests := []struct {
|
||||
version string
|
||||
prefix string
|
||||
amount int
|
||||
}{
|
||||
{"v0.12.0", "-", 1},
|
||||
{"v0.13.0", "--", 2},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
a := monitoringv1.Alertmanager{}
|
||||
a.Spec.Version = test.version
|
||||
replicas := int32(3)
|
||||
a.Spec.Replicas = &replicas
|
||||
|
||||
statefulSet, err := makeStatefulSetSpec(&a, Config{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
amArgs := statefulSet.Template.Spec.Containers[0].Args
|
||||
|
||||
for _, arg := range amArgs {
|
||||
if arg[:test.amount] != test.prefix {
|
||||
t.Fatalf("expected all args to start with %v but got %v", test.prefix, arg)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// below Alertmanager v0.7.0 the flag 'web.route-prefix' does not exist
|
||||
func TestMakeStatefulSetSpecWebRoutePrefix(t *testing.T) {
|
||||
tests := []struct {
|
||||
version string
|
||||
expectWebRoutePrefix bool
|
||||
}{
|
||||
{"v0.6.0", false},
|
||||
{"v0.7.0", true},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
a := monitoringv1.Alertmanager{}
|
||||
a.Spec.Version = test.version
|
||||
replicas := int32(1)
|
||||
a.Spec.Replicas = &replicas
|
||||
|
||||
statefulSet, err := makeStatefulSetSpec(&a, Config{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
amArgs := statefulSet.Template.Spec.Containers[0].Args
|
||||
|
||||
containsWebRoutePrefix := false
|
||||
|
||||
for _, arg := range amArgs {
|
||||
if strings.Contains(arg, "-web.route-prefix") {
|
||||
containsWebRoutePrefix = true
|
||||
}
|
||||
}
|
||||
|
||||
if containsWebRoutePrefix != test.expectWebRoutePrefix {
|
||||
t.Fatalf("expected stateful set containing arg '-web.route-prefix' to be: %v", test.expectWebRoutePrefix)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// below Alertmanager v0.15.0 high availability flags are prefixed with 'mesh' instead of 'cluster'
|
||||
func TestMakeStatefulSetSpecMeshClusterFlags(t *testing.T) {
|
||||
tests := []struct {
|
||||
version string
|
||||
rightHAPrefix string
|
||||
wrongHAPrefix string
|
||||
}{
|
||||
{"v0.14.0", "mesh", "cluster"},
|
||||
{"v0.15.0", "cluster", "mesh"},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
a := monitoringv1.Alertmanager{}
|
||||
a.Spec.Version = test.version
|
||||
replicas := int32(3)
|
||||
a.Spec.Replicas = &replicas
|
||||
|
||||
statefulSet, err := makeStatefulSetSpec(&a, Config{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
haFlags := []string{"--%v.listen-address", "--%v.peer="}
|
||||
|
||||
amArgs := statefulSet.Template.Spec.Containers[0].Args
|
||||
|
||||
for _, flag := range haFlags {
|
||||
if sliceContains(amArgs, fmt.Sprintf(flag, test.wrongHAPrefix)) {
|
||||
t.Fatalf("expected Alertmanager args not to contain %v, but got %v", test.wrongHAPrefix, amArgs)
|
||||
}
|
||||
if !sliceContains(amArgs, fmt.Sprintf(flag, test.rightHAPrefix)) {
|
||||
t.Fatalf("expected Alertmanager args to contain %v, but got %v", test.rightHAPrefix, amArgs)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// below Alertmanager v0.15.0 peer address port specification is not necessary
|
||||
func TestMakeStatefulSetSpecPeerFlagPort(t *testing.T) {
|
||||
tests := []struct {
|
||||
version string
|
||||
portNeeded bool
|
||||
}{
|
||||
{"v0.14.0", false},
|
||||
{"v0.15.0", true},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
a := monitoringv1.Alertmanager{}
|
||||
a.Spec.Version = test.version
|
||||
replicas := int32(3)
|
||||
a.Spec.Replicas = &replicas
|
||||
|
||||
statefulSet, err := makeStatefulSetSpec(&a, Config{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
amArgs := statefulSet.Template.Spec.Containers[0].Args
|
||||
|
||||
for _, arg := range amArgs {
|
||||
if strings.Contains(arg, ".peer") {
|
||||
if strings.Contains(arg, ":6783") != test.portNeeded {
|
||||
t.Fatalf("expected arg '%v' containing port specification to be: %v", arg, test.portNeeded)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func sliceContains(slice []string, match string) bool {
|
||||
contains := false
|
||||
for _, s := range slice {
|
||||
if strings.Contains(s, match) {
|
||||
contains = true
|
||||
}
|
||||
}
|
||||
return contains
|
||||
}
|
||||
|
|
|
@ -213,7 +213,7 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA
|
|||
},
|
||||
"baseImage": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "Base image that is used to deploy pods.",
|
||||
Description: "Base image that is used to deploy pods, without tag.",
|
||||
Type: []string{"string"},
|
||||
Format: "",
|
||||
},
|
||||
|
|
|
@ -396,7 +396,7 @@ type AlertmanagerSpec struct {
|
|||
PodMetadata *metav1.ObjectMeta `json:"podMetadata,omitempty"`
|
||||
// Version the cluster should be on.
|
||||
Version string `json:"version,omitempty"`
|
||||
// Base image that is used to deploy pods.
|
||||
// Base image that is used to deploy pods, without tag.
|
||||
BaseImage string `json:"baseImage,omitempty"`
|
||||
// An optional list of references to secrets in the same namespace
|
||||
// to use for pulling prometheus and alertmanager images from registries
|
||||
|
|
|
@ -130,28 +130,42 @@ func TestExposingAlertmanagerWithKubernetesAPI(t *testing.T) {
|
|||
func TestMeshInitialization(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx := framework.NewTestCtx(t)
|
||||
defer ctx.Cleanup(t)
|
||||
ns := ctx.CreateNamespace(t, framework.KubeClient)
|
||||
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
|
||||
// Starting with Alertmanager v0.15.0 hashicorp/memberlist is used for HA.
|
||||
// Make sure both memberlist as well as mesh (< 0.15.0) work
|
||||
amVersions := []string{"v0.14.0", "v0.15.0-rc.0"}
|
||||
|
||||
amClusterSize := 3
|
||||
alertmanager := framework.MakeBasicAlertmanager("test", int32(amClusterSize))
|
||||
alertmanagerService := framework.MakeAlertmanagerService(alertmanager.Name, "alertmanager-service", v1.ServiceTypeClusterIP)
|
||||
for _, v := range amVersions {
|
||||
version := v
|
||||
t.Run(
|
||||
fmt.Sprintf("amVersion%v", strings.Replace(version, ".", "-", -1)),
|
||||
func(t *testing.T) {
|
||||
t.Parallel()
|
||||
ctx := framework.NewTestCtx(t)
|
||||
defer ctx.Cleanup(t)
|
||||
ns := ctx.CreateNamespace(t, framework.KubeClient)
|
||||
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
|
||||
|
||||
if err := framework.CreateAlertmanagerAndWaitUntilReady(ns, alertmanager); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
amClusterSize := 3
|
||||
alertmanager := framework.MakeBasicAlertmanager("test", int32(amClusterSize))
|
||||
alertmanager.Spec.Version = version
|
||||
alertmanagerService := framework.MakeAlertmanagerService(alertmanager.Name, "alertmanager-service", v1.ServiceTypeClusterIP)
|
||||
|
||||
if _, err := testFramework.CreateServiceAndWaitUntilReady(framework.KubeClient, ns, alertmanagerService); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := framework.CreateAlertmanagerAndWaitUntilReady(ns, alertmanager); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for i := 0; i < amClusterSize; i++ {
|
||||
name := "alertmanager-" + alertmanager.Name + "-" + strconv.Itoa(i)
|
||||
if err := framework.WaitForAlertmanagerInitializedMesh(ns, name, amClusterSize); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, err := testFramework.CreateServiceAndWaitUntilReady(framework.KubeClient, ns, alertmanagerService); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for i := 0; i < amClusterSize; i++ {
|
||||
name := "alertmanager-" + alertmanager.Name + "-" + strconv.Itoa(i)
|
||||
if err := framework.WaitForAlertmanagerInitializedMesh(ns, name, amClusterSize); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -199,12 +199,12 @@ func amImage(version string) string {
|
|||
}
|
||||
|
||||
func (f *Framework) WaitForAlertmanagerInitializedMesh(ns, name string, amountPeers int) error {
|
||||
return wait.Poll(time.Second, time.Second*20, func() (bool, error) {
|
||||
return wait.Poll(time.Second, time.Minute*5, func() (bool, error) {
|
||||
amStatus, err := f.GetAlertmanagerConfig(ns, name)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if len(amStatus.Data.MeshStatus.Peers) == amountPeers {
|
||||
if amStatus.Data.getAmountPeers() == amountPeers {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
|
@ -249,10 +249,20 @@ type alertmanagerStatus struct {
|
|||
}
|
||||
|
||||
type alertmanagerStatusData struct {
|
||||
MeshStatus meshStatus `json:"meshStatus"`
|
||||
ConfigYAML string `json:"configYAML"`
|
||||
ClusterStatus *clusterStatus `json:"clusterStatus,omitempty"`
|
||||
MeshStatus *clusterStatus `json:"meshStatus,omitempty"`
|
||||
ConfigYAML string `json:"configYAML"`
|
||||
}
|
||||
|
||||
type meshStatus struct {
|
||||
// Starting from AM v0.15.0 'MeshStatus' is called 'ClusterStatus'
|
||||
func (s *alertmanagerStatusData) getAmountPeers() int {
|
||||
if s.MeshStatus != nil {
|
||||
return len(s.MeshStatus.Peers)
|
||||
} else {
|
||||
return len(s.ClusterStatus.Peers)
|
||||
}
|
||||
}
|
||||
|
||||
type clusterStatus struct {
|
||||
Peers []interface{} `json:"peers"`
|
||||
}
|
||||
|
|
|
@ -31,7 +31,16 @@ type TestCtx struct {
|
|||
type finalizerFn func() error
|
||||
|
||||
func (f *Framework) NewTestCtx(t *testing.T) TestCtx {
|
||||
prefix := strings.TrimPrefix(strings.ToLower(t.Name()), "test")
|
||||
// TestCtx is used among others for namespace names where '/' is forbidden
|
||||
prefix := strings.TrimPrefix(
|
||||
strings.Replace(
|
||||
strings.ToLower(t.Name()),
|
||||
"/",
|
||||
"-",
|
||||
-1,
|
||||
),
|
||||
"test",
|
||||
)
|
||||
|
||||
id := prefix + "-" + strconv.FormatInt(time.Now().Unix(), 10)
|
||||
return TestCtx{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue