mirror of
https://github.com/prometheus-operator/prometheus-operator.git
synced 2025-04-16 01:06:27 +00:00
* Introduce PrometheusAgent CRD Operator is able to run with PrometheusAgent resources in the cluster, but doesn't do anything with them yet. This is the first step to implement the Prometheus Agent Operator. Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> * Re-enable configmap and secret informers Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> (cherry picked from commit 1a71db03db6b41cd0cee9d0193b6ea3884bb5bae) * Implement Resolve for Agent operator Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> (cherry picked from commit 49558165b9178b6c1bda833a48f7bfe1468c942a) * Operator is able to create Agent Statefulset Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> (cherry picked from commit 7a3826683c92f917312c866a2bb6401dc54b95f2) * Agent Operator creates secret from ServiceMonitors Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> (cherry picked from commit 11232669befb4de9d0765dfadfe5fae00b575f11) * Agent Operator creates secret from PodMonitors Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> (cherry picked from commit 5ae551734bac2babc056c86443d15729d43d12b0) * Agent Operator creates secret from Probes Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> (cherry picked from commit 9637612fbbe9617335fd6188271ebf2cc74a3693) * Agent Operator configures remote-write Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> (cherry picked from commit c4bdf230d527e19f8b77ca5f938b9254ed344f7d) * Agent Operator configures additionalScrapeConfigs Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> (cherry picked from commit d9f28db764641e682bf4fe8963310f791979c387) * Implement UpdateStatus Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> (cherry picked from commit c546ecaf3e8b73916df44a8f48b279c6988e32f5) * Add resource handlers Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> (cherry picked from commit 5b83359445e20f88ea5fff80302fce62d58058b9) * make format Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> (cherry picked from commit 6507964ba28f4ebf32ce3203db752444e288c45d) * Only start agent operator if there is enough permission Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> * Remove node endpoint syncronization from agent operator The server operator already handles it Signed-off-by: ArthurSens <arthursens2005@gmail.com> * Move PrometheusAgent API from v1 to v1alpha1 Signed-off-by: ArthurSens <arthursens2005@gmail.com> * pkg/prometheus/agent/statefulset.go: Fix image concatenation Signed-off-by: ArthurSens <arthursens2005@gmail.com> * Avoid name colisions between Prometheus Agents and Servers Signed-off-by: ArthurSens <arthursens2005@gmail.com> * agent/createOrUpdateConfigurationSecret: Do not handle case where servicemonitor and podmonitor selectors are empty Signed-off-by: ArthurSens <arthursens2005@gmail.com> * make format Signed-off-by: ArthurSens <arthursens2005@gmail.com> * make --always-make format generate Signed-off-by: ArthurSens <arthursens2005@gmail.com> * Remove unused fields from Operator struct Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> * Add deployment mode as new selector label for agent/server ssts Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> * WIP: Fix OperatorUpgrade e2e test Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> * Panic if type casting PrometheusInterface doesn't return Prometheus/Agent Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> * Detect whether PrometheusAgent CRD is installed or not If the operator's service account has all permissions on the cluster and the CRD isn't installed then the PrometheusAgent controller will run but fail because of the absence of the CRD. Signed-off-by: Simon Pasquier <spasquie@redhat.com> * Create dedicated governing service for Prometheus agent Signed-off-by: Simon Pasquier <spasquie@redhat.com> --------- Signed-off-by: Arthur Silva Sens <arthursens2005@gmail.com> Signed-off-by: ArthurSens <arthursens2005@gmail.com> Signed-off-by: Simon Pasquier <spasquie@redhat.com> Co-authored-by: Simon Pasquier <spasquie@redhat.com>
400 lines
15 KiB
Go
400 lines
15 KiB
Go
// Copyright 2016 The prometheus-operator Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package e2e
|
|
|
|
import (
|
|
"context"
|
|
"flag"
|
|
"fmt"
|
|
"io"
|
|
"log"
|
|
"os"
|
|
"strings"
|
|
"testing"
|
|
|
|
"github.com/blang/semver/v4"
|
|
operatorFramework "github.com/prometheus-operator/prometheus-operator/test/framework"
|
|
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/fields"
|
|
)
|
|
|
|
var (
|
|
previousVersionFramework *operatorFramework.Framework
|
|
framework *operatorFramework.Framework
|
|
opImage *string
|
|
)
|
|
|
|
func skipPrometheusAllNSTests(t *testing.T) {
|
|
if os.Getenv("EXCLUDE_PROMETHEUS_ALL_NS_TESTS") != "" {
|
|
t.Skip("Skipping Prometheus all namespace tests")
|
|
}
|
|
}
|
|
|
|
func skipPrometheusTests(t *testing.T) {
|
|
if os.Getenv("EXCLUDE_PROMETHEUS_TESTS") != "" {
|
|
t.Skip("Skipping Prometheus tests")
|
|
}
|
|
}
|
|
|
|
func skipAlertmanagerTests(t *testing.T) {
|
|
if os.Getenv("EXCLUDE_ALERTMANAGER_TESTS") != "" {
|
|
t.Skip("Skipping Alertmanager tests")
|
|
}
|
|
}
|
|
|
|
func skipThanosRulerTests(t *testing.T) {
|
|
if os.Getenv("EXCLUDE_THANOSRULER_TESTS") != "" {
|
|
t.Skip("Skipping ThanosRuler tests")
|
|
}
|
|
}
|
|
|
|
func skipOperatorUpgradeTests(t *testing.T) {
|
|
if os.Getenv("EXCLUDE_OPERATOR_UPGRADE_TESTS") != "" {
|
|
t.Skip("Skipping Operator upgrade tests")
|
|
}
|
|
}
|
|
|
|
// feature gated tests need to be explicitly included
|
|
func runFeatureGatedTests(t *testing.T) {
|
|
if os.Getenv("FEATURE_GATED_TESTS") != "include" {
|
|
t.Skip("Skipping Feature Gated tests")
|
|
}
|
|
}
|
|
|
|
func TestMain(m *testing.M) {
|
|
kubeconfig := flag.String(
|
|
"kubeconfig",
|
|
"",
|
|
"kube config path, e.g. $HOME/.kube/config",
|
|
)
|
|
opImage = flag.String(
|
|
"operator-image",
|
|
"",
|
|
"operator image, e.g. quay.io/prometheus-operator/prometheus-operator",
|
|
)
|
|
flag.Parse()
|
|
|
|
var (
|
|
err error
|
|
exitCode int
|
|
)
|
|
|
|
logger := log.New(os.Stdout, "", log.Lshortfile)
|
|
|
|
currentVersion, err := os.ReadFile("../../VERSION")
|
|
if err != nil {
|
|
logger.Printf("failed to read version file: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
currentSemVer, err := semver.ParseTolerant(string(currentVersion))
|
|
if err != nil {
|
|
logger.Printf("failed to parse current version: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
prevStableVersionURL := fmt.Sprintf("https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/release-%d.%d/VERSION", currentSemVer.Major, currentSemVer.Minor-1)
|
|
reader, err := operatorFramework.URLToIOReader(prevStableVersionURL)
|
|
if err != nil {
|
|
logger.Printf("failed to get previous version file content: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
prevStableVersion, err := io.ReadAll(reader)
|
|
if err != nil {
|
|
logger.Printf("failed to read previous stable version: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
prometheusOperatorGithubBranchURL := "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator"
|
|
|
|
prevSemVer, err := semver.ParseTolerant(string(prevStableVersion))
|
|
if err != nil {
|
|
logger.Printf("failed to parse previous stable version: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
prevStableOpImage := fmt.Sprintf("%s:v%s", "quay.io/prometheus-operator/prometheus-operator", strings.TrimSpace(string(prevStableVersion)))
|
|
prevExampleDir := fmt.Sprintf("%s/release-%d.%d/example", prometheusOperatorGithubBranchURL, prevSemVer.Major, prevSemVer.Minor)
|
|
prevResourcesDir := fmt.Sprintf("%s/release-%d.%d/test/framework/resources", prometheusOperatorGithubBranchURL, prevSemVer.Major, prevSemVer.Minor)
|
|
|
|
if previousVersionFramework, err = operatorFramework.New(*kubeconfig, prevStableOpImage, prevExampleDir, prevResourcesDir, prevSemVer); err != nil {
|
|
logger.Printf("failed to setup previous version framework: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
exampleDir := "../../example"
|
|
resourcesDir := "../framework/resources"
|
|
|
|
nextSemVer, err := semver.ParseTolerant(fmt.Sprintf("0.%d.0", currentSemVer.Minor))
|
|
if err != nil {
|
|
logger.Printf("failed to parse next version: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
// init with next minor version since we are developing toward it.
|
|
if framework, err = operatorFramework.New(*kubeconfig, *opImage, exampleDir, resourcesDir, nextSemVer); err != nil {
|
|
logger.Printf("failed to setup framework: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
exitCode = m.Run()
|
|
|
|
os.Exit(exitCode)
|
|
}
|
|
|
|
// TestAllNS tests the Prometheus Operator watching all namespaces in a
|
|
// Kubernetes cluster.
|
|
func TestAllNS(t *testing.T) {
|
|
testCtx := framework.NewTestCtx(t)
|
|
defer testCtx.Cleanup(t)
|
|
|
|
ns := framework.CreateNamespace(context.Background(), t, testCtx)
|
|
|
|
finalizers, err := framework.CreateOrUpdatePrometheusOperator(context.Background(), ns, nil, nil, nil, nil, true, true, true)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
for _, f := range finalizers {
|
|
testCtx.AddFinalizerFn(f)
|
|
}
|
|
|
|
t.Run("TestServerTLS", testServerTLS(context.Background(), t, ns))
|
|
|
|
// t.Run blocks until the function passed as the second argument (f) returns or
|
|
// calls t.Parallel to become a parallel test. Run reports whether f succeeded
|
|
// (or at least did not fail before calling t.Parallel). As all tests in
|
|
// testAllNS are parallel, the deferred ctx.Cleanup above would be run before
|
|
// all tests finished. Wrapping it in testAllNSPrometheus and testAllNSAlertmanager
|
|
// fixes this.
|
|
t.Run("x", testAllNSAlertmanager)
|
|
t.Run("y", testAllNSPrometheus)
|
|
t.Run("z", testAllNSThanosRuler)
|
|
|
|
// Check if Prometheus Operator ever restarted.
|
|
opts := metav1.ListOptions{LabelSelector: fields.SelectorFromSet(fields.Set(map[string]string{
|
|
"app.kubernetes.io/name": "prometheus-operator",
|
|
})).String()}
|
|
|
|
pl, err := framework.KubeClient.CoreV1().Pods(ns).List(context.Background(), opts)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if expected := 1; len(pl.Items) != expected {
|
|
t.Fatalf("expected %v Prometheus Operator pods, but got %v", expected, len(pl.Items))
|
|
}
|
|
restarts, err := framework.GetPodRestartCount(context.Background(), ns, pl.Items[0].GetName())
|
|
if err != nil {
|
|
t.Fatalf("failed to retrieve restart count of Prometheus Operator pod: %v", err)
|
|
}
|
|
if len(restarts) != 1 {
|
|
t.Fatalf("expected to have 1 container but got %d", len(restarts))
|
|
}
|
|
for _, restart := range restarts {
|
|
if restart != 0 {
|
|
t.Fatalf(
|
|
"expected Prometheus Operator to never restart during entire test execution but got %d restarts",
|
|
restart,
|
|
)
|
|
}
|
|
}
|
|
}
|
|
|
|
func testAllNSAlertmanager(t *testing.T) {
|
|
skipAlertmanagerTests(t)
|
|
testFuncs := map[string]func(t *testing.T){
|
|
"AlertmanagerCRD": testAlertmanagerCRDValidation,
|
|
"AMCreateDeleteCluster": testAMCreateDeleteCluster,
|
|
"AMScaling": testAMScaling,
|
|
"AMVersionMigration": testAMVersionMigration,
|
|
"AMStorageUpdate": testAMStorageUpdate,
|
|
"AMExposingWithKubernetesAPI": testAMExposingWithKubernetesAPI,
|
|
"AMClusterInitialization": testAMClusterInitialization,
|
|
"AMClusterAfterRollingUpdate": testAMClusterAfterRollingUpdate,
|
|
"AMClusterGossipSilences": testAMClusterGossipSilences,
|
|
"AMReloadConfig": testAMReloadConfig,
|
|
"AMZeroDowntimeRollingDeployment": testAMZeroDowntimeRollingDeployment,
|
|
"AMAlertmanagerConfigCRD": testAlertmanagerConfigCRD,
|
|
"AMAlertmanagerConfigVersions": testAlertmanagerConfigVersions,
|
|
"AMUserDefinedAMConfigFromSecret": testUserDefinedAlertmanagerConfigFromSecret,
|
|
"AMUserDefinedAMConfigFromCustomResource": testUserDefinedAlertmanagerConfigFromCustomResource,
|
|
"AMPreserveUserAddedMetadata": testAMPreserveUserAddedMetadata,
|
|
"AMRollbackManualChanges": testAMRollbackManualChanges,
|
|
"AMMinReadySeconds": testAlertManagerMinReadySeconds,
|
|
"AMWeb": testAMWeb,
|
|
}
|
|
|
|
for name, f := range testFuncs {
|
|
t.Run(name, f)
|
|
}
|
|
}
|
|
|
|
func testAllNSPrometheus(t *testing.T) {
|
|
skipPrometheusAllNSTests(t)
|
|
testFuncs := map[string]func(t *testing.T){
|
|
"PrometheusCRDValidation": testPrometheusCRDValidation,
|
|
"PromRemoteWriteWithTLS": testPromRemoteWriteWithTLS,
|
|
"PromCreateDeleteCluster": testPromCreateDeleteCluster,
|
|
"PromScaleUpDownCluster": testPromScaleUpDownCluster,
|
|
"PromNoServiceMonitorSelector": testPromNoServiceMonitorSelector,
|
|
"PromVersionMigration": testPromVersionMigration,
|
|
"PromResourceUpdate": testPromResourceUpdate,
|
|
"PromStorageLabelsAnnotations": testPromStorageLabelsAnnotations,
|
|
"PromStorageUpdate": testPromStorageUpdate,
|
|
"PromReloadConfig": testPromReloadConfig,
|
|
"PromAdditionalScrapeConfig": testPromAdditionalScrapeConfig,
|
|
"PromAdditionalAlertManagerConfig": testPromAdditionalAlertManagerConfig,
|
|
"PromReloadRules": testPromReloadRules,
|
|
"PromMultiplePrometheusRulesSameNS": testPromMultiplePrometheusRulesSameNS,
|
|
"PromMultiplePrometheusRulesDifferentNS": testPromMultiplePrometheusRulesDifferentNS,
|
|
"PromRulesExceedingConfigMapLimit": testPromRulesExceedingConfigMapLimit,
|
|
"PromRulesMustBeAnnotated": testPromRulesMustBeAnnotated,
|
|
"PromtestInvalidRulesAreRejected": testInvalidRulesAreRejected,
|
|
"PromOnlyUpdatedOnRelevantChanges": testPromOnlyUpdatedOnRelevantChanges,
|
|
"PromWhenDeleteCRDCleanUpViaOwnerRef": testPromWhenDeleteCRDCleanUpViaOwnerRef,
|
|
"PromDiscovery": testPromDiscovery,
|
|
"ShardingProvisioning": testShardingProvisioning,
|
|
"Resharding": testResharding,
|
|
"PromAlertmanagerDiscovery": testPromAlertmanagerDiscovery,
|
|
"PromExposingWithKubernetesAPI": testPromExposingWithKubernetesAPI,
|
|
"PromDiscoverTargetPort": testPromDiscoverTargetPort,
|
|
"PromOpMatchPromAndServMonInDiffNSs": testPromOpMatchPromAndServMonInDiffNSs,
|
|
"PromGetAuthSecret": testPromGetAuthSecret,
|
|
"PromArbitraryFSAcc": testPromArbitraryFSAcc,
|
|
"PromTLSConfigViaSecret": testPromTLSConfigViaSecret,
|
|
"Thanos": testThanos,
|
|
"PromStaticProbe": testPromStaticProbe,
|
|
"PromSecurePodMonitor": testPromSecurePodMonitor,
|
|
"PromSharedResourcesReconciliation": testPromSharedResourcesReconciliation,
|
|
"PromPreserveUserAddedMetadata": testPromPreserveUserAddedMetadata,
|
|
"PromWebWithThanosSidecar": testPromWebWithThanosSidecar,
|
|
"PromMinReadySeconds": testPromMinReadySeconds,
|
|
"PromEnforcedNamespaceLabel": testPromEnforcedNamespaceLabel,
|
|
"PromNamespaceEnforcementExclusion": testPromNamespaceEnforcementExclusion,
|
|
"PromQueryLogFile": testPromQueryLogFile,
|
|
"PromDegradedCondition": testPromDegradedConditionStatus,
|
|
"PromUnavailableCondition": testPromUnavailableConditionStatus,
|
|
"PromStrategicMergePatch": testPromStrategicMergePatch,
|
|
"RelabelConfigCRDValidation": testRelabelConfigCRDValidation,
|
|
"PromReconcileStatusWhenInvalidRuleCreated": testPromReconcileStatusWhenInvalidRuleCreated,
|
|
}
|
|
|
|
for name, f := range testFuncs {
|
|
t.Run(name, f)
|
|
}
|
|
}
|
|
|
|
func testAllNSThanosRuler(t *testing.T) {
|
|
skipThanosRulerTests(t)
|
|
testFuncs := map[string]func(t *testing.T){
|
|
"ThanosRulerCreateDeleteCluster": testThanosRulerCreateDeleteCluster,
|
|
"ThanosRulerPrometheusRuleInDifferentNamespace": testThanosRulerPrometheusRuleInDifferentNamespace,
|
|
"ThanosRulerPreserveUserAddedMetadata": testTRPreserveUserAddedMetadata,
|
|
"ThanosRulerMinReadySeconds": testTRMinReadySeconds,
|
|
"ThanosRulerAlertmanagerConfig": testTRAlertmanagerConfig,
|
|
"ThanosRulerQueryConfig": testTRQueryConfig,
|
|
}
|
|
for name, f := range testFuncs {
|
|
t.Run(name, f)
|
|
}
|
|
}
|
|
|
|
// TestMultiNS tests the Prometheus Operator configured to watch specific
|
|
// namespaces.
|
|
func TestMultiNS(t *testing.T) {
|
|
skipPrometheusTests(t)
|
|
testFuncs := map[string]func(t *testing.T){
|
|
"OperatorNSScope": testOperatorNSScope,
|
|
}
|
|
|
|
for name, f := range testFuncs {
|
|
t.Run(name, f)
|
|
}
|
|
}
|
|
|
|
// TestDenylist tests the Prometheus Operator configured not to watch specific namespaces.
|
|
func TestDenylist(t *testing.T) {
|
|
skipPrometheusTests(t)
|
|
testFuncs := map[string]func(t *testing.T){
|
|
"Prometheus": testDenyPrometheus,
|
|
"ServiceMonitor": testDenyServiceMonitor,
|
|
"ThanosRuler": testDenyThanosRuler,
|
|
}
|
|
|
|
for name, f := range testFuncs {
|
|
t.Run(name, f)
|
|
}
|
|
}
|
|
|
|
// TestPromInstanceNs tests prometheus operator in different scenarios when --prometheus-instance-namespace is given
|
|
func TestPromInstanceNs(t *testing.T) {
|
|
skipPrometheusTests(t)
|
|
testFuncs := map[string]func(t *testing.T){
|
|
"AllNs": testPrometheusInstanceNamespacesAllNs,
|
|
"AllowList": testPrometheusInstanceNamespacesAllowList,
|
|
"DenyList": testPrometheusInstanceNamespacesDenyList,
|
|
"NamespaceNotFound": testPrometheusInstanceNamespacesNamespaceNotFound,
|
|
}
|
|
|
|
for name, f := range testFuncs {
|
|
t.Run(name, f)
|
|
}
|
|
}
|
|
|
|
// TestAlertmanagerInstanceNs tests prometheus operator in different scenarios when --alertmanager-instance-namespace is given
|
|
func TestAlertmanagerInstanceNs(t *testing.T) {
|
|
skipAlertmanagerTests(t)
|
|
testFuncs := map[string]func(t *testing.T){
|
|
"AllNs": testAlertmanagerInstanceNamespacesAllNs,
|
|
"AllowList": testAlertmanagerInstanceNamespacesAllowList,
|
|
"DenyNs": testAlertmanagerInstanceNamespacesDenyNs,
|
|
}
|
|
|
|
for name, f := range testFuncs {
|
|
t.Run(name, f)
|
|
}
|
|
}
|
|
|
|
// TestOperatorUpgrade tests the prometheus upgrade from previous stable minor version to current version
|
|
func TestOperatorUpgrade(t *testing.T) {
|
|
skipOperatorUpgradeTests(t)
|
|
testFuncs := map[string]func(t *testing.T){
|
|
"OperatorUpgrade": testOperatorUpgrade,
|
|
}
|
|
|
|
for name, f := range testFuncs {
|
|
t.Run(name, f)
|
|
}
|
|
}
|
|
|
|
const (
|
|
prometheusOperatorServiceName = "prometheus-operator"
|
|
)
|
|
|
|
func testServerTLS(ctx context.Context, t *testing.T, namespace string) func(t *testing.T) {
|
|
return func(t *testing.T) {
|
|
skipPrometheusTests(t)
|
|
if err := framework.WaitForServiceReady(context.Background(), namespace, prometheusOperatorServiceName); err != nil {
|
|
t.Fatal("waiting for prometheus operator service: ", err)
|
|
}
|
|
|
|
operatorService := framework.KubeClient.CoreV1().Services(namespace)
|
|
request := operatorService.ProxyGet("https", prometheusOperatorServiceName, "https", "/healthz", make(map[string]string))
|
|
_, err := request.DoRaw(ctx)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
}
|
|
}
|