1
0
Fork 0
mirror of https://github.com/prometheus-operator/prometheus-operator.git synced 2025-04-21 03:38:43 +00:00

feat: add serviceName option to ThanosRuler and AlertManager CRD ()

This commit is contained in:
Thomas Rouaux 2025-02-21 17:01:25 +01:00 committed by GitHub
parent 61e898365b
commit 4f32e47eed
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
28 changed files with 470 additions and 201 deletions

View file

@ -515,6 +515,22 @@ PodDNSConfig
</tr>
<tr>
<td>
<code>serviceName</code><br/>
<em>
string
</em>
</td>
<td>
<em>(Optional)</em>
<p>The name of the service name used by the underlying StatefulSet(s) as the governing service.
If defined, the Service must be created before the Alertmanager resource in the same namespace and it must define a selector that matches the pod labels.
If empty, the operator will create and manage a headless service named <code>alertmanager-operated</code> for Alermanager resources.
When deploying multiple Alertmanager resources in the same namespace, it is recommended to specify a different value for each.
See <a href="https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id">https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id</a> for more details.</p>
</td>
</tr>
<tr>
<td>
<code>serviceAccountName</code><br/>
<em>
string
@ -4234,6 +4250,22 @@ string
</tr>
<tr>
<td>
<code>serviceName</code><br/>
<em>
string
</em>
</td>
<td>
<em>(Optional)</em>
<p>The name of the service name used by the underlying StatefulSet(s) as the governing service.
If defined, the Service must be created before the ThanosRuler resource in the same namespace and it must define a selector that matches the pod labels.
If empty, the operator will create and manage a headless service named <code>thanos-ruler-operated</code> for ThanosRuler resources.
When deploying multiple ThanosRuler resources in the same namespace, it is recommended to specify a different value for each.
See <a href="https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id">https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id</a> for more details.</p>
</td>
</tr>
<tr>
<td>
<code>serviceAccountName</code><br/>
<em>
string
@ -5929,6 +5961,22 @@ PodDNSConfig
</tr>
<tr>
<td>
<code>serviceName</code><br/>
<em>
string
</em>
</td>
<td>
<em>(Optional)</em>
<p>The name of the service name used by the underlying StatefulSet(s) as the governing service.
If defined, the Service must be created before the Alertmanager resource in the same namespace and it must define a selector that matches the pod labels.
If empty, the operator will create and manage a headless service named <code>alertmanager-operated</code> for Alermanager resources.
When deploying multiple Alertmanager resources in the same namespace, it is recommended to specify a different value for each.
See <a href="https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id">https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id</a> for more details.</p>
</td>
</tr>
<tr>
<td>
<code>serviceAccountName</code><br/>
<em>
string
@ -16723,6 +16771,22 @@ string
</tr>
<tr>
<td>
<code>serviceName</code><br/>
<em>
string
</em>
</td>
<td>
<em>(Optional)</em>
<p>The name of the service name used by the underlying StatefulSet(s) as the governing service.
If defined, the Service must be created before the ThanosRuler resource in the same namespace and it must define a selector that matches the pod labels.
If empty, the operator will create and manage a headless service named <code>thanos-ruler-operated</code> for ThanosRuler resources.
When deploying multiple ThanosRuler resources in the same namespace, it is recommended to specify a different value for each.
See <a href="https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id">https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id</a> for more details.</p>
</td>
</tr>
<tr>
<td>
<code>serviceAccountName</code><br/>
<em>
string

18
bundle.yaml generated
View file

@ -15586,6 +15586,15 @@ spec:
ServiceAccountName is the name of the ServiceAccount to use to run the
Prometheus Pods.
type: string
serviceName:
description: |-
The name of the service name used by the underlying StatefulSet(s) as the governing service.
If defined, the Service must be created before the Alertmanager resource in the same namespace and it must define a selector that matches the pod labels.
If empty, the operator will create and manage a headless service named `alertmanager-operated` for Alermanager resources.
When deploying multiple Alertmanager resources in the same namespace, it is recommended to specify a different value for each.
See https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id for more details.
minLength: 1
type: string
sha:
description: |-
SHA of Alertmanager container image to be deployed. Defaults to the value of `version`.
@ -63901,6 +63910,15 @@ spec:
ServiceAccountName is the name of the ServiceAccount to use to run the
Thanos Ruler Pods.
type: string
serviceName:
description: |-
The name of the service name used by the underlying StatefulSet(s) as the governing service.
If defined, the Service must be created before the ThanosRuler resource in the same namespace and it must define a selector that matches the pod labels.
If empty, the operator will create and manage a headless service named `thanos-ruler-operated` for ThanosRuler resources.
When deploying multiple ThanosRuler resources in the same namespace, it is recommended to specify a different value for each.
See https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id for more details.
minLength: 1
type: string
storage:
description: Storage spec to specify how storage shall be used.
properties:

View file

@ -5547,6 +5547,15 @@ spec:
ServiceAccountName is the name of the ServiceAccount to use to run the
Prometheus Pods.
type: string
serviceName:
description: |-
The name of the service name used by the underlying StatefulSet(s) as the governing service.
If defined, the Service must be created before the Alertmanager resource in the same namespace and it must define a selector that matches the pod labels.
If empty, the operator will create and manage a headless service named `alertmanager-operated` for Alermanager resources.
When deploying multiple Alertmanager resources in the same namespace, it is recommended to specify a different value for each.
See https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id for more details.
minLength: 1
type: string
sha:
description: |-
SHA of Alertmanager container image to be deployed. Defaults to the value of `version`.

View file

@ -4940,6 +4940,15 @@ spec:
ServiceAccountName is the name of the ServiceAccount to use to run the
Thanos Ruler Pods.
type: string
serviceName:
description: |-
The name of the service name used by the underlying StatefulSet(s) as the governing service.
If defined, the Service must be created before the ThanosRuler resource in the same namespace and it must define a selector that matches the pod labels.
If empty, the operator will create and manage a headless service named `thanos-ruler-operated` for ThanosRuler resources.
When deploying multiple ThanosRuler resources in the same namespace, it is recommended to specify a different value for each.
See https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id for more details.
minLength: 1
type: string
storage:
description: Storage spec to specify how storage shall be used.
properties:

View file

@ -5548,6 +5548,15 @@ spec:
ServiceAccountName is the name of the ServiceAccount to use to run the
Prometheus Pods.
type: string
serviceName:
description: |-
The name of the service name used by the underlying StatefulSet(s) as the governing service.
If defined, the Service must be created before the Alertmanager resource in the same namespace and it must define a selector that matches the pod labels.
If empty, the operator will create and manage a headless service named `alertmanager-operated` for Alermanager resources.
When deploying multiple Alertmanager resources in the same namespace, it is recommended to specify a different value for each.
See https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id for more details.
minLength: 1
type: string
sha:
description: |-
SHA of Alertmanager container image to be deployed. Defaults to the value of `version`.

View file

@ -4941,6 +4941,15 @@ spec:
ServiceAccountName is the name of the ServiceAccount to use to run the
Thanos Ruler Pods.
type: string
serviceName:
description: |-
The name of the service name used by the underlying StatefulSet(s) as the governing service.
If defined, the Service must be created before the ThanosRuler resource in the same namespace and it must define a selector that matches the pod labels.
If empty, the operator will create and manage a headless service named `thanos-ruler-operated` for ThanosRuler resources.
When deploying multiple ThanosRuler resources in the same namespace, it is recommended to specify a different value for each.
See https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id for more details.
minLength: 1
type: string
storage:
description: Storage spec to specify how storage shall be used.
properties:

View file

@ -4858,6 +4858,11 @@
"description": "ServiceAccountName is the name of the ServiceAccount to use to run the\nPrometheus Pods.",
"type": "string"
},
"serviceName": {
"description": "The name of the service name used by the underlying StatefulSet(s) as the governing service.\nIf defined, the Service must be created before the Alertmanager resource in the same namespace and it must define a selector that matches the pod labels.\nIf empty, the operator will create and manage a headless service named `alertmanager-operated` for Alermanager resources.\nWhen deploying multiple Alertmanager resources in the same namespace, it is recommended to specify a different value for each.\nSee https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id for more details.",
"minLength": 1,
"type": "string"
},
"sha": {
"description": "SHA of Alertmanager container image to be deployed. Defaults to the value of `version`.\nSimilar to a tag, but the SHA explicitly deploys an immutable container image.\nVersion and Tag are ignored if SHA is set.\nDeprecated: use 'image' instead. The image digest can be specified as part of the image URL.",
"type": "string"

View file

@ -4287,6 +4287,11 @@
"description": "ServiceAccountName is the name of the ServiceAccount to use to run the\nThanos Ruler Pods.",
"type": "string"
},
"serviceName": {
"description": "The name of the service name used by the underlying StatefulSet(s) as the governing service.\nIf defined, the Service must be created before the ThanosRuler resource in the same namespace and it must define a selector that matches the pod labels.\nIf empty, the operator will create and manage a headless service named `thanos-ruler-operated` for ThanosRuler resources.\nWhen deploying multiple ThanosRuler resources in the same namespace, it is recommended to specify a different value for each.\nSee https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id for more details.",
"minLength": 1,
"type": "string"
},
"storage": {
"description": "Storage spec to specify how storage shall be used.",
"properties": {

View file

@ -562,10 +562,17 @@ func (c *Operator) sync(ctx context.Context, key string) error {
return fmt.Errorf("synchronizing web config secret failed: %w", err)
}
// Create governing service if it doesn't exist.
svcClient := c.kclient.CoreV1().Services(am.Namespace)
if _, err = k8sutil.CreateOrUpdateService(ctx, svcClient, makeStatefulSetService(am, c.config)); err != nil {
return fmt.Errorf("synchronizing governing service failed: %w", err)
if am.Spec.ServiceName != nil {
selectorLabels := makeSelectorLabels(am.Name)
if err := k8sutil.EnsureCustomGoverningService(ctx, am.Namespace, *am.Spec.ServiceName, svcClient, selectorLabels); err != nil {
return err
}
} else {
// Create governing service if it doesn't exist.
if _, err = k8sutil.CreateOrUpdateService(ctx, svcClient, makeStatefulSetService(am, c.config)); err != nil {
return fmt.Errorf("synchronizing governing service failed: %w", err)
}
}
existingStatefulSet, err := c.getStatefulSetFromAlertmanagerKey(key)

View file

@ -717,7 +717,7 @@ func makeStatefulSetSpec(logger *slog.Logger, a *monitoringv1.Alertmanager, conf
}
spec := appsv1.StatefulSetSpec{
ServiceName: governingServiceName,
ServiceName: ptr.Deref(a.Spec.ServiceName, governingServiceName),
Replicas: a.Spec.Replicas,
MinReadySeconds: minReadySeconds,
// PodManagementPolicy is set to Parallel to mitigate issues in kubernetes: https://github.com/kubernetes/kubernetes/issues/60164

View file

@ -193,6 +193,14 @@ type AlertmanagerSpec struct {
//
// +optional
DNSConfig *PodDNSConfig `json:"dnsConfig,omitempty"`
// The name of the service name used by the underlying StatefulSet(s) as the governing service.
// If defined, the Service must be created before the Alertmanager resource in the same namespace and it must define a selector that matches the pod labels.
// If empty, the operator will create and manage a headless service named `alertmanager-operated` for Alermanager resources.
// When deploying multiple Alertmanager resources in the same namespace, it is recommended to specify a different value for each.
// See https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id for more details.
// +optional
// +kubebuilder:validation:MinLength=1
ServiceName *string `json:"serviceName,omitempty"`
// ServiceAccountName is the name of the ServiceAccount to use to run the
// Prometheus Pods.
ServiceAccountName string `json:"serviceAccountName,omitempty"`

View file

@ -140,6 +140,15 @@ type ThanosRulerSpec struct {
// Priority class assigned to the Pods
PriorityClassName string `json:"priorityClassName,omitempty"`
// The name of the service name used by the underlying StatefulSet(s) as the governing service.
// If defined, the Service must be created before the ThanosRuler resource in the same namespace and it must define a selector that matches the pod labels.
// If empty, the operator will create and manage a headless service named `thanos-ruler-operated` for ThanosRuler resources.
// When deploying multiple ThanosRuler resources in the same namespace, it is recommended to specify a different value for each.
// See https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-network-id for more details.
// +optional
// +kubebuilder:validation:MinLength=1
ServiceName *string `json:"serviceName,omitempty"`
// ServiceAccountName is the name of the ServiceAccount to use to run the
// Thanos Ruler Pods.
ServiceAccountName string `json:"serviceAccountName,omitempty"`

View file

@ -377,6 +377,11 @@ func (in *AlertmanagerSpec) DeepCopyInto(out *AlertmanagerSpec) {
*out = new(PodDNSConfig)
(*in).DeepCopyInto(*out)
}
if in.ServiceName != nil {
in, out := &in.ServiceName, &out.ServiceName
*out = new(string)
**out = **in
}
if in.Containers != nil {
in, out := &in.Containers, &out.Containers
*out = make([]corev1.Container, len(*in))
@ -3390,6 +3395,11 @@ func (in *ThanosRulerSpec) DeepCopyInto(out *ThanosRulerSpec) {
*out = new(PodDNSConfig)
(*in).DeepCopyInto(*out)
}
if in.ServiceName != nil {
in, out := &in.ServiceName, &out.ServiceName
*out = new(string)
**out = **in
}
if in.Storage != nil {
in, out := &in.Storage, &out.Storage
*out = new(StorageSpec)

View file

@ -56,6 +56,7 @@ type AlertmanagerSpecApplyConfiguration struct {
SecurityContext *corev1.PodSecurityContext `json:"securityContext,omitempty"`
DNSPolicy *monitoringv1.DNSPolicy `json:"dnsPolicy,omitempty"`
DNSConfig *PodDNSConfigApplyConfiguration `json:"dnsConfig,omitempty"`
ServiceName *string `json:"serviceName,omitempty"`
ServiceAccountName *string `json:"serviceAccountName,omitempty"`
ListenLocal *bool `json:"listenLocal,omitempty"`
Containers []corev1.Container `json:"containers,omitempty"`
@ -346,6 +347,14 @@ func (b *AlertmanagerSpecApplyConfiguration) WithDNSConfig(value *PodDNSConfigAp
return b
}
// WithServiceName sets the ServiceName field in the declarative configuration to the given value
// and returns the receiver, so that objects can be built by chaining "With" function invocations.
// If called multiple times, the ServiceName field is set to the value of the last call.
func (b *AlertmanagerSpecApplyConfiguration) WithServiceName(value string) *AlertmanagerSpecApplyConfiguration {
b.ServiceName = &value
return b
}
// WithServiceAccountName sets the ServiceAccountName field in the declarative configuration to the given value
// and returns the receiver, so that objects can be built by chaining "With" function invocations.
// If called multiple times, the ServiceAccountName field is set to the value of the last call.

View file

@ -41,6 +41,7 @@ type ThanosRulerSpecApplyConfiguration struct {
DNSPolicy *monitoringv1.DNSPolicy `json:"dnsPolicy,omitempty"`
DNSConfig *PodDNSConfigApplyConfiguration `json:"dnsConfig,omitempty"`
PriorityClassName *string `json:"priorityClassName,omitempty"`
ServiceName *string `json:"serviceName,omitempty"`
ServiceAccountName *string `json:"serviceAccountName,omitempty"`
Storage *StorageSpecApplyConfiguration `json:"storage,omitempty"`
Volumes []corev1.Volume `json:"volumes,omitempty"`
@ -226,6 +227,14 @@ func (b *ThanosRulerSpecApplyConfiguration) WithPriorityClassName(value string)
return b
}
// WithServiceName sets the ServiceName field in the declarative configuration to the given value
// and returns the receiver, so that objects can be built by chaining "With" function invocations.
// If called multiple times, the ServiceName field is set to the value of the last call.
func (b *ThanosRulerSpecApplyConfiguration) WithServiceName(value string) *ThanosRulerSpecApplyConfiguration {
b.ServiceName = &value
return b
}
// WithServiceAccountName sets the ServiceAccountName field in the declarative configuration to the given value
// and returns the receiver, so that objects can be built by chaining "With" function invocations.
// If called multiple times, the ServiceAccountName field is set to the value of the last call.

View file

@ -33,6 +33,7 @@ import (
apiequality "k8s.io/apimachinery/pkg/api/equality"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
@ -566,3 +567,32 @@ func UpdateDNSPolicy(podSpec *v1.PodSpec, dnsPolicy *monitoringv1.DNSPolicy) {
podSpec.DNSPolicy = v1.DNSPolicy(*dnsPolicy)
}
// This function is responsible for the following:
//
// Verify that the service exists in the resource's namespace
// If it does not exist, fail the reconciliation.
//
// If the ServiceName is specified and a service with the same name exists in the same namespace as the
// resource, ensure that the custom governing service's selector matches the
// labels.
// If it is not selected, fail the reconciliation
// Warning: the function will panic if the resource's ServiceName is nil..
func EnsureCustomGoverningService(ctx context.Context, namespace string, serviceName string, svcClient clientv1.ServiceInterface, selectorLabels map[string]string) error {
// Check if the custom governing service is defined in the same namespace and selects the Prometheus pod.
svc, err := svcClient.Get(ctx, serviceName, metav1.GetOptions{})
if err != nil {
return fmt.Errorf("failed to get custom governing service %s/%s: %w", namespace, serviceName, err)
}
svcSelector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{MatchLabels: svc.Spec.Selector})
if err != nil {
return fmt.Errorf("failed to parse the selector labels for custom governing service %s/%s: %w", namespace, serviceName, err)
}
if !svcSelector.Matches(labels.Set(selectorLabels)) {
return fmt.Errorf("custom governing service %s/%s with selector %q does not select pods with labels %q",
namespace, serviceName, svcSelector.String(), labels.Set(selectorLabels).String())
}
return nil
}

View file

@ -555,3 +555,129 @@ func TestConvertToK8sDNSConfig(t *testing.T) {
require.Equal(t, opt.Value, spec.DNSConfig.Options[i].Value, "expected option values to match")
}
}
func TestEnsureCustomGoverningService(t *testing.T) {
name := "test-k8sutil"
serviceName := "test-svc"
ns := "test-ns"
testcases := []struct {
name string
service v1.Service
selectorLabels map[string]string
expectedErr bool
}{
{
name: "custom service selects k8sutil",
service: v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: serviceName,
Namespace: ns,
},
Spec: v1.ServiceSpec{
Selector: map[string]string{
"k8sutil": name,
"app.kubernetes.io/name": "k8sutil",
"app.kubernetes.io/instance": name,
"app.kubernetes.io/managed-by": "prometheus-operator",
},
},
},
selectorLabels: map[string]string{
"k8sutil": name,
"app.kubernetes.io/name": "k8sutil",
"app.kubernetes.io/instance": name,
"app.kubernetes.io/managed-by": "prometheus-operator",
},
expectedErr: false,
},
{
name: "custom service does not select k8sutil",
service: v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: "test-svc",
Namespace: ns,
},
Spec: v1.ServiceSpec{
Selector: map[string]string{
"k8sutil": "different-name",
"app.kubernetes.io/name": "k8sutil",
"app.kubernetes.io/instance": "different-name",
"app.kubernetes.io/managed-by": "prometheus-operator",
},
},
},
selectorLabels: map[string]string{
"k8sutil": name,
"app.kubernetes.io/name": "k8sutil",
"app.kubernetes.io/instance": name,
"app.kubernetes.io/managed-by": "prometheus-operator",
},
expectedErr: true,
},
{
name: "custom service selects k8sutil but in different ns",
service: v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: "test-svc",
Namespace: "wrong-ns",
},
Spec: v1.ServiceSpec{
Selector: map[string]string{
"k8sutil": name,
"app.kubernetes.io/name": "k8sutil",
"app.kubernetes.io/instance": name,
"app.kubernetes.io/managed-by": "prometheus-operator",
},
},
},
selectorLabels: map[string]string{
"k8sutil": name,
"app.kubernetes.io/name": "k8sutil",
"app.kubernetes.io/instance": name,
"app.kubernetes.io/managed-by": "prometheus-operator",
},
expectedErr: true,
},
{
name: "custom svc doesn't exist",
selectorLabels: map[string]string{
"k8sutil": name,
"app.kubernetes.io/name": "k8sutil",
"app.kubernetes.io/instance": name,
"app.kubernetes.io/managed-by": "prometheus-operator",
},
expectedErr: true,
},
}
for _, tc := range testcases {
t.Run(tc.name, func(t *testing.T) {
p := makeBarebonesPrometheus(name, ns)
p.Spec.ServiceName = &serviceName
clientSet := fake.NewSimpleClientset(&tc.service)
svcClient := clientSet.CoreV1().Services(ns)
err := EnsureCustomGoverningService(context.Background(), p.Namespace, *p.Spec.ServiceName, svcClient, tc.selectorLabels)
if tc.expectedErr {
require.Error(t, err)
} else {
require.NoError(t, err)
}
})
}
}
func makeBarebonesPrometheus(name, ns string) *monitoringv1.Prometheus {
return &monitoringv1.Prometheus{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: ns,
Annotations: map[string]string{},
},
Spec: monitoringv1.PrometheusSpec{
CommonPrometheusFields: monitoringv1.CommonPrometheusFields{
Replicas: ptr.To(int32(1)),
},
},
}
}

View file

@ -696,7 +696,7 @@ func (c *Operator) syncStatefulSet(ctx context.Context, key string, p *monitorin
svcClient := c.kclient.CoreV1().Services(p.Namespace)
selectorLabels := makeSelectorLabels(p.Name)
if err := prompkg.EnsureCustomGoverningService(ctx, p.Namespace, *p.Spec.ServiceName, svcClient, selectorLabels); err != nil {
if err := k8sutil.EnsureCustomGoverningService(ctx, p.Namespace, *p.Spec.ServiceName, svcClient, selectorLabels); err != nil {
return err
}
} else {

View file

@ -298,7 +298,7 @@ func makeStatefulSetSpec(
// PodManagementPolicy is set to Parallel to mitigate issues in kubernetes: https://github.com/kubernetes/kubernetes/issues/60164
// This is also mentioned as one of limitations of StatefulSets: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#limitations
return &appsv1.StatefulSetSpec{
ServiceName: governingServiceName,
ServiceName: ptr.Deref(cpf.ServiceName, governingServiceName),
Replicas: cpf.Replicas,
PodManagementPolicy: appsv1.ParallelPodManagement,
UpdateStrategy: appsv1.StatefulSetUpdateStrategy{

View file

@ -16,7 +16,6 @@ package prometheus
import (
"bytes"
"context"
"fmt"
"net/url"
"path"
@ -24,9 +23,7 @@ import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/intstr"
clientv1 "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/utils/ptr"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
@ -490,32 +487,3 @@ func BuildStatefulSetService(name string, selector map[string]string, p monitori
return svc
}
// This function is responsible for the following:
//
// Verify that the service exists in the Prometheus/PrometheusAgent resource's namespace
// If it does not exist, fail the reconciliation.
//
// If the ServiceName is specified and a service with the same name exists in the same namespace as the
// Prometheus/PrometheusAgent resource, ensure that the custom governing service's selector matches the
// Prometheus/PrometheusAgent statefulsets.
// If it is not selected, fail the reconciliation
// Warning: the function will panic if the resource's ServiceName is nil..
func EnsureCustomGoverningService(ctx context.Context, namespace string, serviceName string, svcClient clientv1.ServiceInterface, selectorLabels map[string]string) error {
// Check if the custom governing service is defined in the same namespace and selects the Prometheus pod.
svc, err := svcClient.Get(ctx, serviceName, metav1.GetOptions{})
if err != nil {
return fmt.Errorf("failed to get custom governing service %s/%s: %w", namespace, serviceName, err)
}
svcSelector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{MatchLabels: svc.Spec.Selector})
if err != nil {
return fmt.Errorf("failed to parse the selector labels for custom governing service %s/%s: %w", namespace, serviceName, err)
}
if !svcSelector.Matches(labels.Set(selectorLabels)) {
return fmt.Errorf("custom governing service %s/%s with selector %q does not select Prometheus/PrometheusAgent pods with labels %q",
namespace, serviceName, svcSelector.String(), labels.Set(selectorLabels).String())
}
return nil
}

View file

@ -15,14 +15,11 @@
package prometheus
import (
"context"
"strings"
"testing"
"github.com/stretchr/testify/require"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes/fake"
"k8s.io/utils/ptr"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
@ -285,153 +282,3 @@ func TestBuildCommonPrometheusArgsWithOTLPReceiver(t *testing.T) {
})
}
}
func TestEnsureCustomGoverningService(t *testing.T) {
name := "test-prometheus"
serviceName := "test-svc"
ns := "test-ns"
testcases := []struct {
name string
service v1.Service
selectorLabels map[string]string
expectedErr bool
}{
{
name: "custom service selects prometheus",
service: v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: serviceName,
Namespace: ns,
},
Spec: v1.ServiceSpec{
Selector: map[string]string{
"prometheus": name,
"app.kubernetes.io/name": "prometheus",
"app.kubernetes.io/instance": name,
"app.kubernetes.io/managed-by": "prometheus-operator",
},
},
},
selectorLabels: map[string]string{
"prometheus": name,
"app.kubernetes.io/name": "prometheus",
"app.kubernetes.io/instance": name,
"app.kubernetes.io/managed-by": "prometheus-operator",
},
expectedErr: false,
},
{
name: "custom service does not select prometheus",
service: v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: "test-svc",
Namespace: ns,
},
Spec: v1.ServiceSpec{
Selector: map[string]string{
"prometheus": "different-name",
"app.kubernetes.io/name": "prometheus",
"app.kubernetes.io/instance": "different-name",
"app.kubernetes.io/managed-by": "prometheus-operator",
},
},
},
selectorLabels: map[string]string{
"prometheus": name,
"app.kubernetes.io/name": "prometheus",
"app.kubernetes.io/instance": name,
"app.kubernetes.io/managed-by": "prometheus-operator",
},
expectedErr: true,
},
{
name: "custom service selects prometheus but in different ns",
service: v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: "test-svc",
Namespace: "wrong-ns",
},
Spec: v1.ServiceSpec{
Selector: map[string]string{
"prometheus": name,
"app.kubernetes.io/name": "prometheus",
"app.kubernetes.io/instance": name,
"app.kubernetes.io/managed-by": "prometheus-operator",
},
},
},
selectorLabels: map[string]string{
"prometheus": name,
"app.kubernetes.io/name": "prometheus",
"app.kubernetes.io/instance": name,
"app.kubernetes.io/managed-by": "prometheus-operator",
},
expectedErr: true,
},
{
name: "custom service selects prometheus but in different ns",
service: v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: "test-svc",
Namespace: "wrong-ns",
},
Spec: v1.ServiceSpec{
Selector: map[string]string{
"prometheus": name,
"app.kubernetes.io/name": "prometheus",
"app.kubernetes.io/instance": name,
"app.kubernetes.io/managed-by": "prometheus-operator",
},
},
},
selectorLabels: map[string]string{
"prometheus": name,
"app.kubernetes.io/name": "prometheus",
"app.kubernetes.io/instance": name,
"app.kubernetes.io/managed-by": "prometheus-operator",
},
expectedErr: true,
},
{
name: "custom svc doesn't exist",
selectorLabels: map[string]string{
"prometheus": name,
"app.kubernetes.io/name": "prometheus",
"app.kubernetes.io/instance": name,
"app.kubernetes.io/managed-by": "prometheus-operator",
},
expectedErr: true,
},
}
for _, tc := range testcases {
t.Run(tc.name, func(t *testing.T) {
p := makeBarebonesPrometheus(name, ns)
p.Spec.ServiceName = &serviceName
clientSet := fake.NewSimpleClientset(&tc.service)
svcClient := clientSet.CoreV1().Services(ns)
err := EnsureCustomGoverningService(context.Background(), p.Namespace, *p.Spec.ServiceName, svcClient, tc.selectorLabels)
if tc.expectedErr {
require.Error(t, err)
} else {
require.NoError(t, err)
}
})
}
}
func makeBarebonesPrometheus(name, ns string) *monitoringv1.Prometheus {
return &monitoringv1.Prometheus{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: ns,
Annotations: map[string]string{},
},
Spec: monitoringv1.PrometheusSpec{
CommonPrometheusFields: monitoringv1.CommonPrometheusFields{
Replicas: ptr.To(int32(1)),
},
},
}
}

View file

@ -801,7 +801,7 @@ func (c *Operator) sync(ctx context.Context, key string) error {
svcClient := c.kclient.CoreV1().Services(p.Namespace)
selectorLabels := makeSelectorLabels(p.Name)
if err := prompkg.EnsureCustomGoverningService(ctx, p.Namespace, *p.Spec.ServiceName, svcClient, selectorLabels); err != nil {
if err := k8sutil.EnsureCustomGoverningService(ctx, p.Namespace, *p.Spec.ServiceName, svcClient, selectorLabels); err != nil {
return err
}
} else {

View file

@ -333,7 +333,7 @@ func makeStatefulSetSpec(
}
spec := appsv1.StatefulSetSpec{
ServiceName: governingServiceName,
ServiceName: ptr.Deref(cpf.ServiceName, governingServiceName),
Replicas: cpf.Replicas,
// PodManagementPolicy is set to Parallel to mitigate issues in kubernetes: https://github.com/kubernetes/kubernetes/issues/60164
// This is also mentioned as one of limitations of StatefulSets: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#limitations

View file

@ -477,10 +477,17 @@ func (o *Operator) sync(ctx context.Context, key string) error {
return fmt.Errorf("failed to synchronize web config secret: %w", err)
}
// Create governing service if it doesn't exist.
svcClient := o.kclient.CoreV1().Services(tr.Namespace)
if _, err = k8sutil.CreateOrUpdateService(ctx, svcClient, makeStatefulSetService(tr, o.config)); err != nil {
return fmt.Errorf("synchronizing governing service failed: %w", err)
if tr.Spec.ServiceName != nil {
selectorLabels := makeSelectorLabels(tr.Name)
if err := k8sutil.EnsureCustomGoverningService(ctx, tr.Namespace, *tr.Spec.ServiceName, svcClient, selectorLabels); err != nil {
return err
}
} else {
// Create governing service if it doesn't exist.
if _, err = k8sutil.CreateOrUpdateService(ctx, svcClient, makeStatefulSetService(tr, o.config)); err != nil {
return fmt.Errorf("synchronizing governing service failed: %w", err)
}
}
// Ensure we have a StatefulSet running Thanos deployed.
@ -794,3 +801,16 @@ func newTLSAssetSecret(tr *monitoringv1.ThanosRuler, config Config) *v1.Secret {
return s
}
// In cases where an existing selector label is modified, or a new one is added, new sts cannot match existing pods.
// We should try to avoid removing such immutable fields whenever possible since doing
// so forces us to enter the 'recreate cycle' and can potentially lead to downtime.
// The requirement to make a change here should be carefully evaluated.
func makeSelectorLabels(name string) map[string]string {
return map[string]string{
"app.kubernetes.io/name": "thanos-ruler",
"app.kubernetes.io/managed-by": "prometheus-operator",
"app.kubernetes.io/instance": name,
"thanos-ruler": name,
}
}

View file

@ -17,6 +17,7 @@ package thanos
import (
"errors"
"fmt"
"maps"
"net/url"
"path"
"path/filepath"
@ -378,11 +379,10 @@ func makeStatefulSetSpec(tr *monitoringv1.ThanosRuler, config Config, ruleConfig
// We should try to avoid removing such immutable fields whenever possible since doing
// so forces us to enter the 'recreate cycle' and can potentially lead to downtime.
// The requirement to make a change here should be carefully evaluated.
podLabels["app.kubernetes.io/name"] = thanosRulerLabel
podLabels["app.kubernetes.io/managed-by"] = "prometheus-operator"
podLabels["app.kubernetes.io/instance"] = tr.Name
podLabels[thanosRulerLabel] = tr.Name
selectorLabels := makeSelectorLabels(tr.Name)
finalLabels := config.Labels.Merge(podLabels)
maps.Copy(finalLabels, selectorLabels)
podAnnotations["kubectl.kubernetes.io/default-container"] = "thanos-ruler"
@ -448,7 +448,7 @@ func makeStatefulSetSpec(tr *monitoringv1.ThanosRuler, config Config, ruleConfig
}
spec := appsv1.StatefulSetSpec{
ServiceName: governingServiceName,
ServiceName: ptr.Deref(tr.Spec.ServiceName, governingServiceName),
Replicas: tr.Spec.Replicas,
MinReadySeconds: minReadySeconds,
// PodManagementPolicy is set to Parallel to mitigate issues in kubernetes: https://github.com/kubernetes/kubernetes/issues/60164

View file

@ -2622,3 +2622,51 @@ templates: []
err = framework.DeleteAlertmanagerAndWaitUntilGone(context.Background(), ns, amName)
require.NoError(t, err)
}
func testAlertManagerServiceName(t *testing.T) {
t.Parallel()
ctx := context.Background()
testCtx := framework.NewTestCtx(t)
defer testCtx.Cleanup(t)
ns := framework.CreateNamespace(ctx, t, testCtx)
name := "test-servicename"
svc := &v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("%s-service", name),
Namespace: ns,
},
Spec: v1.ServiceSpec{
Type: v1.ServiceTypeLoadBalancer,
Ports: []v1.ServicePort{
{
Name: "web",
Port: 9090,
},
},
Selector: map[string]string{
"app.kubernetes.io/name": "alertmanager",
"app.kubernetes.io/managed-by": "prometheus-operator",
"app.kubernetes.io/instance": name,
"alertmanager": name,
},
},
}
_, err := framework.KubeClient.CoreV1().Services(ns).Create(ctx, svc, metav1.CreateOptions{})
require.NoError(t, err)
framework.SetupPrometheusRBAC(ctx, t, testCtx, ns)
am := framework.MakeBasicAlertmanager(ns, name, 1)
am.Spec.ServiceName = &svc.Name
_, err = framework.CreateAlertmanagerAndWaitUntilReady(context.Background(), am)
require.NoError(t, err)
// Ensure that the default governing service was not created by the operator.
svcList, err := framework.KubeClient.CoreV1().Services(ns).List(ctx, metav1.ListOptions{})
require.NoError(t, err)
require.Len(t, svcList.Items, 1)
require.Equal(t, svcList.Items[0].Name, svc.Name)
}

View file

@ -240,6 +240,7 @@ func testAllNSAlertmanager(t *testing.T) {
"AMWeb": testAMWeb,
"AMTemplateReloadConfig": testAMTmplateReloadConfig,
"AMStatusScale": testAlertmanagerStatusScale,
"AMServiceName": testAlertManagerServiceName,
}
for name, f := range testFuncs {
@ -324,6 +325,7 @@ func testAllNSThanosRuler(t *testing.T) {
"ThanosRulerAlertmanagerConfig": testTRAlertmanagerConfig,
"ThanosRulerQueryConfig": testTRQueryConfig,
"ThanosRulerCheckStorageClass": testTRCheckStorageClass,
"ThanosRulerServiceName": testThanosRulerServiceName,
}
for name, f := range testFuncs {
t.Run(name, f)

View file

@ -511,3 +511,51 @@ func testTRCheckStorageClass(t *testing.T) {
t.Fatalf("%v: %v", err, loopError)
}
}
func testThanosRulerServiceName(t *testing.T) {
t.Parallel()
ctx := context.Background()
testCtx := framework.NewTestCtx(t)
defer testCtx.Cleanup(t)
ns := framework.CreateNamespace(ctx, t, testCtx)
name := "test-servicename"
svc := &v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("%s-service", name),
Namespace: ns,
},
Spec: v1.ServiceSpec{
Type: v1.ServiceTypeLoadBalancer,
Ports: []v1.ServicePort{
{
Name: "web",
Port: 9090,
},
},
Selector: map[string]string{
"app.kubernetes.io/name": "thanos-ruler",
"app.kubernetes.io/managed-by": "prometheus-operator",
"app.kubernetes.io/instance": name,
"thanos-ruler": name,
},
},
}
_, err := framework.KubeClient.CoreV1().Services(ns).Create(ctx, svc, metav1.CreateOptions{})
require.NoError(t, err)
framework.SetupPrometheusRBAC(ctx, t, testCtx, ns)
tr := framework.MakeBasicThanosRuler(name, 1, "http://test.example.com")
tr.Spec.ServiceName = &svc.Name
_, err = framework.CreateThanosRulerAndWaitUntilReady(ctx, ns, tr)
require.NoError(t, err)
// Ensure that the default governing service was not created by the operator.
svcList, err := framework.KubeClient.CoreV1().Services(ns).List(ctx, metav1.ListOptions{})
require.NoError(t, err)
require.Len(t, svcList.Items, 1)
require.Equal(t, svcList.Items[0].Name, svc.Name)
}