mirror of
https://github.com/prometheus-operator/prometheus-operator.git
synced 2025-04-21 03:38:43 +00:00
Merge pull request #3382 from sjentzsch/master
Allow for enabling Alertmanager HA cluster mode even when running with single replica
This commit is contained in:
commit
a9657237c4
9 changed files with 56 additions and 13 deletions
|
@ -166,6 +166,7 @@ AlertmanagerSpec is a specification of the desired behavior of the Alertmanager
|
|||
| additionalPeers | AdditionalPeers allows injecting a set of additional Alertmanagers to peer with to form a highly available cluster. | []string | false |
|
||||
| clusterAdvertiseAddress | ClusterAdvertiseAddress is the explicit address to advertise in cluster. Needs to be provided for non RFC1918 [1] (public) addresses. [1] RFC1918: https://tools.ietf.org/html/rfc1918 | string | false |
|
||||
| portName | Port name used for the pods and governing service. This defaults to web | string | false |
|
||||
| forceEnableClusterMode | ForceEnableClusterMode ensures Alertmanager does not deactivate the cluster mode when running with a single replica. Use case is e.g. spanning an Alertmanager cluster across Kubernetes clusters with a single replica in each. | bool | false |
|
||||
|
||||
[Back to TOC](#table-of-contents)
|
||||
|
||||
|
|
|
@ -1719,6 +1719,12 @@ spec:
|
|||
under. This is necessary to generate correct URLs. This is necessary
|
||||
if Alertmanager is not served from root of a DNS name.
|
||||
type: string
|
||||
forceEnableClusterMode:
|
||||
description: ForceEnableClusterMode ensures Alertmanager does not
|
||||
deactivate the cluster mode when running with a single replica.
|
||||
Use case is e.g. spanning an Alertmanager cluster across Kubernetes
|
||||
clusters with a single replica in each.
|
||||
type: boolean
|
||||
image:
|
||||
description: Image if specified has precedence over baseImage, tag
|
||||
and sha combinations. Specifying the version is still necessary
|
||||
|
|
|
@ -1719,6 +1719,12 @@ spec:
|
|||
under. This is necessary to generate correct URLs. This is necessary
|
||||
if Alertmanager is not served from root of a DNS name.
|
||||
type: string
|
||||
forceEnableClusterMode:
|
||||
description: ForceEnableClusterMode ensures Alertmanager does not
|
||||
deactivate the cluster mode when running with a single replica.
|
||||
Use case is e.g. spanning an Alertmanager cluster across Kubernetes
|
||||
clusters with a single replica in each.
|
||||
type: boolean
|
||||
image:
|
||||
description: Image if specified has precedence over baseImage, tag
|
||||
and sha combinations. Specifying the version is still necessary
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -232,7 +232,7 @@ func makeStatefulSetSpec(a *monitoringv1.Alertmanager, config Config) (*appsv1.S
|
|||
fmt.Sprintf("--data.retention=%s", a.Spec.Retention),
|
||||
}
|
||||
|
||||
if *a.Spec.Replicas == 1 {
|
||||
if *a.Spec.Replicas == 1 && !a.Spec.ForceEnableClusterMode {
|
||||
amArgs = append(amArgs, "--cluster.listen-address=")
|
||||
} else {
|
||||
amArgs = append(amArgs, "--cluster.listen-address=[$(POD_IP)]:9094")
|
||||
|
|
|
@ -672,6 +672,33 @@ func TestClusterListenAddressForSingleReplica(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestClusterListenAddressForSingleReplicaWithForceEnableClusterMode(t *testing.T) {
|
||||
a := monitoringv1.Alertmanager{}
|
||||
replicas := int32(1)
|
||||
a.Spec.Version = operator.DefaultAlertmanagerVersion
|
||||
a.Spec.Replicas = &replicas
|
||||
a.Spec.ForceEnableClusterMode = true
|
||||
|
||||
statefulSet, err := makeStatefulSetSpec(&a, defaultTestConfig)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
amArgs := statefulSet.Template.Spec.Containers[0].Args
|
||||
|
||||
containsEmptyClusterListenAddress := false
|
||||
|
||||
for _, arg := range amArgs {
|
||||
if arg == "--cluster.listen-address=" {
|
||||
containsEmptyClusterListenAddress = true
|
||||
}
|
||||
}
|
||||
|
||||
if containsEmptyClusterListenAddress {
|
||||
t.Fatal("expected stateful set to not contain arg '--cluster.listen-address='")
|
||||
}
|
||||
}
|
||||
|
||||
func TestClusterListenAddressForMultiReplica(t *testing.T) {
|
||||
a := monitoringv1.Alertmanager{}
|
||||
replicas := int32(3)
|
||||
|
|
|
@ -1187,6 +1187,9 @@ type AlertmanagerSpec struct {
|
|||
// Port name used for the pods and governing service.
|
||||
// This defaults to web
|
||||
PortName string `json:"portName,omitempty"`
|
||||
// ForceEnableClusterMode ensures Alertmanager does not deactivate the cluster mode when running with a single replica.
|
||||
// Use case is e.g. spanning an Alertmanager cluster across Kubernetes clusters with a single replica in each.
|
||||
ForceEnableClusterMode bool `json:"forceEnableClusterMode,omitempty"`
|
||||
}
|
||||
|
||||
// AlertmanagerList is a list of Alertmanagers.
|
||||
|
|
|
@ -240,7 +240,7 @@ func testAMClusterInitialization(t *testing.T) {
|
|||
|
||||
for i := 0; i < amClusterSize; i++ {
|
||||
name := "alertmanager-" + alertmanager.Name + "-" + strconv.Itoa(i)
|
||||
if err := framework.WaitForAlertmanagerInitialized(ns, name, amClusterSize); err != nil {
|
||||
if err := framework.WaitForAlertmanagerInitialized(ns, name, amClusterSize, alertmanager.Spec.ForceEnableClusterMode); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
@ -268,7 +268,7 @@ func testAMClusterAfterRollingUpdate(t *testing.T) {
|
|||
|
||||
for i := 0; i < amClusterSize; i++ {
|
||||
name := "alertmanager-" + alertmanager.Name + "-" + strconv.Itoa(i)
|
||||
if err := framework.WaitForAlertmanagerInitialized(ns, name, amClusterSize); err != nil {
|
||||
if err := framework.WaitForAlertmanagerInitialized(ns, name, amClusterSize, alertmanager.Spec.ForceEnableClusterMode); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
@ -283,7 +283,7 @@ func testAMClusterAfterRollingUpdate(t *testing.T) {
|
|||
|
||||
for i := 0; i < amClusterSize; i++ {
|
||||
name := "alertmanager-" + alertmanager.Name + "-" + strconv.Itoa(i)
|
||||
if err := framework.WaitForAlertmanagerInitialized(ns, name, amClusterSize); err != nil {
|
||||
if err := framework.WaitForAlertmanagerInitialized(ns, name, amClusterSize, alertmanager.Spec.ForceEnableClusterMode); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
@ -306,7 +306,7 @@ func testAMClusterGossipSilences(t *testing.T) {
|
|||
|
||||
for i := 0; i < amClusterSize; i++ {
|
||||
name := "alertmanager-" + alertmanager.Name + "-" + strconv.Itoa(i)
|
||||
if err := framework.WaitForAlertmanagerInitialized(ns, name, amClusterSize); err != nil {
|
||||
if err := framework.WaitForAlertmanagerInitialized(ns, name, amClusterSize, alertmanager.Spec.ForceEnableClusterMode); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
@ -610,7 +610,7 @@ inhibit_rules:
|
|||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := framework.WaitForAlertmanagerReady(ns, alertmanager.Name, int(*alertmanager.Spec.Replicas)); err != nil {
|
||||
if err := framework.WaitForAlertmanagerReady(ns, alertmanager.Name, int(*alertmanager.Spec.Replicas), alertmanager.Spec.ForceEnableClusterMode); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
|
@ -699,7 +699,7 @@ inhibit_rules:
|
|||
// Wait for the change above to take effect.
|
||||
time.Sleep(time.Minute)
|
||||
|
||||
if err := framework.WaitForAlertmanagerReady(ns, alertmanager.Name, int(*alertmanager.Spec.Replicas)); err != nil {
|
||||
if err := framework.WaitForAlertmanagerReady(ns, alertmanager.Name, int(*alertmanager.Spec.Replicas), alertmanager.Spec.ForceEnableClusterMode); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
|
|
|
@ -130,12 +130,12 @@ func (f *Framework) CreateAlertmanagerAndWaitUntilReady(ns string, a *monitoring
|
|||
return nil, errors.Wrap(err, fmt.Sprintf("creating alertmanager %v failed", a.Name))
|
||||
}
|
||||
|
||||
return a, f.WaitForAlertmanagerReady(ns, a.Name, int(*a.Spec.Replicas))
|
||||
return a, f.WaitForAlertmanagerReady(ns, a.Name, int(*a.Spec.Replicas), a.Spec.ForceEnableClusterMode)
|
||||
}
|
||||
|
||||
// WaitForAlertmanagerReady waits for each individual pod as well as the
|
||||
// cluster as a whole to be ready.
|
||||
func (f *Framework) WaitForAlertmanagerReady(ns, name string, replicas int) error {
|
||||
func (f *Framework) WaitForAlertmanagerReady(ns, name string, replicas int, forceEnableClusterMode bool) error {
|
||||
if err := WaitForPodsReady(
|
||||
f.KubeClient,
|
||||
ns,
|
||||
|
@ -152,7 +152,7 @@ func (f *Framework) WaitForAlertmanagerReady(ns, name string, replicas int) erro
|
|||
|
||||
for i := 0; i < replicas; i++ {
|
||||
name := fmt.Sprintf("alertmanager-%v-%v", name, strconv.Itoa(i))
|
||||
if err := f.WaitForAlertmanagerInitialized(ns, name, replicas); err != nil {
|
||||
if err := f.WaitForAlertmanagerInitialized(ns, name, replicas, forceEnableClusterMode); err != nil {
|
||||
return errors.Wrap(err,
|
||||
fmt.Sprintf(
|
||||
"failed to wait for an Alertmanager cluster (%s) with %d instances to become ready",
|
||||
|
@ -212,7 +212,7 @@ func amImage(version string) string {
|
|||
return fmt.Sprintf("quay.io/prometheus/alertmanager:%s", version)
|
||||
}
|
||||
|
||||
func (f *Framework) WaitForAlertmanagerInitialized(ns, name string, amountPeers int) error {
|
||||
func (f *Framework) WaitForAlertmanagerInitialized(ns, name string, amountPeers int, forceEnableClusterMode bool) error {
|
||||
var pollError error
|
||||
err := wait.Poll(time.Second, time.Minute*5, func() (bool, error) {
|
||||
amStatus, err := f.GetAlertmanagerStatus(ns, name)
|
||||
|
@ -220,7 +220,7 @@ func (f *Framework) WaitForAlertmanagerInitialized(ns, name string, amountPeers
|
|||
return false, err
|
||||
}
|
||||
|
||||
isAlertmanagerInClusterMode := amountPeers > 1
|
||||
isAlertmanagerInClusterMode := amountPeers > 1 || forceEnableClusterMode
|
||||
if !isAlertmanagerInClusterMode && amStatus.Status == "success" {
|
||||
return true, nil
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue