Merge pull request #7274 from mviswanathsai/shard-autoretention-featuregate

Add ShardRetentionPolicy to Prometheus API
2025-04-21 03:38:43 +00:00 · 2025-02-28 19:59:04 +05:30 · 2025-02-28 19:59:04 +05:30 · 9bc6e5e7db
commit 9bc6e5e7db
parent 021a23526b
16 changed files with 354 additions and 3 deletions
--- a/Documentation/api-reference/api.md
+++ b/Documentation/api-reference/api.md
@ -3228,6 +3228,24 @@ ByteSize
 </tr>
 <tr>
 <td>
+<code>shardRetentionPolicy</code><br/>
+<em>
+<a href="#monitoring.coreos.com/v1.ShardRetentionPolicy">
+ShardRetentionPolicy
+</a>
+</em>
+</td>
+<td>
+<em>(Optional)</em>
+<p>ShardRetentionPolicy defines the retention policy for the Prometheus shards.
+(Alpha) Using this field requires the &lsquo;PrometheusShardRetentionPolicy&rsquo; feature gate to be enabled.</p>
+<p>The final goals for this feature can be seen at <a href="https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/proposals/202310-shard-autoscaling.md#graceful-scale-down-of-prometheus-servers">https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/proposals/202310-shard-autoscaling.md#graceful-scale-down-of-prometheus-servers</a>,
+however, the feature is not yet fully implemented in this PR. The limitation being:
+* Retention duration is not settable, for now, shards are retained forever.</p>
+</td>
+</tr>
+<tr>
+<td>
 <code>disableCompaction</code><br/>
 <em>
 bool
@ -13336,6 +13354,24 @@ ByteSize
 </tr>
 <tr>
 <td>
+<code>shardRetentionPolicy</code><br/>
+<em>
+<a href="#monitoring.coreos.com/v1.ShardRetentionPolicy">
+ShardRetentionPolicy
+</a>
+</em>
+</td>
+<td>
+<em>(Optional)</em>
+<p>ShardRetentionPolicy defines the retention policy for the Prometheus shards.
+(Alpha) Using this field requires the &lsquo;PrometheusShardRetentionPolicy&rsquo; feature gate to be enabled.</p>
+<p>The final goals for this feature can be seen at <a href="https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/proposals/202310-shard-autoscaling.md#graceful-scale-down-of-prometheus-servers">https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/proposals/202310-shard-autoscaling.md#graceful-scale-down-of-prometheus-servers</a>,
+however, the feature is not yet fully implemented in this PR. The limitation being:
+* Retention duration is not settable, for now, shards are retained forever.</p>
+</td>
+</tr>
+<tr>
+<td>
 <code>disableCompaction</code><br/>
 <em>
 bool
@ -16092,6 +16128,40 @@ of uncompressed response body that will be accepted by Prometheus.</p>
 </tr>
 </tbody>
 </table>
+<h3 id="monitoring.coreos.com/v1.ShardRetentionPolicy">ShardRetentionPolicy
+</h3>
+<p>
+(<em>Appears on:</em><a href="#monitoring.coreos.com/v1.PrometheusSpec">PrometheusSpec</a>)
+</p>
+<div>
+</div>
+<table>
+<thead>
+<tr>
+<th>Field</th>
+<th>Description</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>
+<code>whenScaled</code><br/>
+<em>
+<a href="#monitoring.coreos.com/v1.WhenScaledRetentionType">
+WhenScaledRetentionType
+</a>
+</em>
+</td>
+<td>
+<em>(Optional)</em>
+<p>Defines the retention policy when the Prometheus shards are scaled down.
+* <code>Delete</code>, the operator will delete the pods from the scaled-down shard(s).
+* <code>Retain</code>, the operator will keep the pods from the scaled-down shard(s), so the data can still be queried.</p>
+<p>If not defined, the operator assumes the <code>Delete</code> value.</p>
+</td>
+</tr>
+</tbody>
+</table>
 <h3 id="monitoring.coreos.com/v1.ShardStatus">ShardStatus
 </h3>
 <p>
@ -18431,6 +18501,13 @@ order.</p>
 </tr>
 </tbody>
 </table>
+<h3 id="monitoring.coreos.com/v1.WhenScaledRetentionType">WhenScaledRetentionType
+(<code>string</code> alias)</h3>
+<p>
+(<em>Appears on:</em><a href="#monitoring.coreos.com/v1.ShardRetentionPolicy">ShardRetentionPolicy</a>)
+</p>
+<div>
+</div>
 <hr/>
 <h2 id="monitoring.coreos.com/v1alpha1">monitoring.coreos.com/v1alpha1</h2>
 Resource Types:
--- a/Documentation/platform/operator.md
+++ b/Documentation/platform/operator.md
@ -57,6 +57,7 @@ Usage of ./operator:
    	Feature gates are a set of key=value pairs that describe Prometheus-Operator features.
    	Available feature gates:
    	  PrometheusAgentDaemonSet: Enables the DaemonSet mode for PrometheusAgent (enabled: false)
+    	  PrometheusShardRetentionPolicy: Enables shard retention policy for Prometheus (enabled: false)
    	  PrometheusTopologySharding: Enables the zone aware sharding for Prometheus (enabled: false)
  -key-file string
    	- NOT RECOMMENDED FOR PRODUCTION - Path to private TLS certificate file.
--- a/bundle.yaml
+++ b/bundle.yaml
@ -41033,6 +41033,27 @@ spec:
                description: 'Deprecated: use ''spec.image'' instead. The image''s
                  digest can be specified as part of the image name.'
                type: string
+              shardRetentionPolicy:
+                description: |-
+                  ShardRetentionPolicy defines the retention policy for the Prometheus shards.
+                  (Alpha) Using this field requires the 'PrometheusShardRetentionPolicy' feature gate to be enabled.
+
+                  The final goals for this feature can be seen at https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/proposals/202310-shard-autoscaling.md#graceful-scale-down-of-prometheus-servers,
+                  however, the feature is not yet fully implemented in this PR. The limitation being:
+                  * Retention duration is not settable, for now, shards are retained forever.
+                properties:
+                  whenScaled:
+                    description: |-
+                      Defines the retention policy when the Prometheus shards are scaled down.
+                      * `Delete`, the operator will delete the pods from the scaled-down shard(s).
+                      * `Retain`, the operator will keep the pods from the scaled-down shard(s), so the data can still be queried.
+
+                      If not defined, the operator assumes the `Delete` value.
+                    enum:
+                    - Retain
+                    - Delete
+                    type: string
+                type: object
              shards:
                description: |-
                  Number of shards to distribute the scraped targets onto.
--- a/example/prometheus-operator-crd-full/monitoring.coreos.com_prometheuses.yaml
+++ b/example/prometheus-operator-crd-full/monitoring.coreos.com_prometheuses.yaml
@ -9030,6 +9030,27 @@ spec:
                description: 'Deprecated: use ''spec.image'' instead. The image''s
                  digest can be specified as part of the image name.'
                type: string
+              shardRetentionPolicy:
+                description: |-
+                  ShardRetentionPolicy defines the retention policy for the Prometheus shards.
+                  (Alpha) Using this field requires the 'PrometheusShardRetentionPolicy' feature gate to be enabled.
+
+                  The final goals for this feature can be seen at https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/proposals/202310-shard-autoscaling.md#graceful-scale-down-of-prometheus-servers,
+                  however, the feature is not yet fully implemented in this PR. The limitation being:
+                  * Retention duration is not settable, for now, shards are retained forever.
+                properties:
+                  whenScaled:
+                    description: |-
+                      Defines the retention policy when the Prometheus shards are scaled down.
+                      * `Delete`, the operator will delete the pods from the scaled-down shard(s).
+                      * `Retain`, the operator will keep the pods from the scaled-down shard(s), so the data can still be queried.
+
+                      If not defined, the operator assumes the `Delete` value.
+                    enum:
+                    - Retain
+                    - Delete
+                    type: string
+                type: object
              shards:
                description: |-
                  Number of shards to distribute the scraped targets onto.
--- a/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml
+++ b/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml
@ -9031,6 +9031,27 @@ spec:
                description: 'Deprecated: use ''spec.image'' instead. The image''s
                  digest can be specified as part of the image name.'
                type: string
+              shardRetentionPolicy:
+                description: |-
+                  ShardRetentionPolicy defines the retention policy for the Prometheus shards.
+                  (Alpha) Using this field requires the 'PrometheusShardRetentionPolicy' feature gate to be enabled.
+
+                  The final goals for this feature can be seen at https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/proposals/202310-shard-autoscaling.md#graceful-scale-down-of-prometheus-servers,
+                  however, the feature is not yet fully implemented in this PR. The limitation being:
+                  * Retention duration is not settable, for now, shards are retained forever.
+                properties:
+                  whenScaled:
+                    description: |-
+                      Defines the retention policy when the Prometheus shards are scaled down.
+                      * `Delete`, the operator will delete the pods from the scaled-down shard(s).
+                      * `Retain`, the operator will keep the pods from the scaled-down shard(s), so the data can still be queried.
+
+                      If not defined, the operator assumes the `Delete` value.
+                    enum:
+                    - Retain
+                    - Delete
+                    type: string
+                type: object
              shards:
                description: |-
                  Number of shards to distribute the scraped targets onto.
--- a/jsonnet/prometheus-operator/prometheuses-crd.json
+++ b/jsonnet/prometheus-operator/prometheuses-crd.json
@ -7688,6 +7688,20 @@
                    "description": "Deprecated: use 'spec.image' instead. The image's digest can be specified as part of the image name.",
                    "type": "string"
                  },
+                  "shardRetentionPolicy": {
+                    "description": "ShardRetentionPolicy defines the retention policy for the Prometheus shards.\n(Alpha) Using this field requires the 'PrometheusShardRetentionPolicy' feature gate to be enabled.\n\nThe final goals for this feature can be seen at https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/proposals/202310-shard-autoscaling.md#graceful-scale-down-of-prometheus-servers,\nhowever, the feature is not yet fully implemented in this PR. The limitation being:\n* Retention duration is not settable, for now, shards are retained forever.",
+                    "properties": {
+                      "whenScaled": {
+                        "description": "Defines the retention policy when the Prometheus shards are scaled down.\n* `Delete`, the operator will delete the pods from the scaled-down shard(s).\n* `Retain`, the operator will keep the pods from the scaled-down shard(s), so the data can still be queried.\n\nIf not defined, the operator assumes the `Delete` value.",
+                        "enum": [
+                          "Retain",
+                          "Delete"
+                        ],
+                        "type": "string"
+                      }
+                    },
+                    "type": "object"
+                  },
                  "shards": {
                    "description": "Number of shards to distribute the scraped targets onto.\n\n`spec.replicas` multiplied by `spec.shards` is the total number of Pods\nbeing created.\n\nWhen not defined, the operator assumes only one shard.\n\nNote that scaling down shards will not reshard data onto the remaining\ninstances, it must be manually moved. Increasing shards will not reshard\ndata either but it will continue to be available from the same\ninstances. To query globally, use either\n* Thanos sidecar + querier for query federation and Thanos Ruler for rules.\n* Remote-write to send metrics to a central location.\n\nBy default, the sharding of targets is performed on:\n* The `__address__` target's metadata label for PodMonitor,\nServiceMonitor and ScrapeConfig resources.\n* The `__param_target__` label for Probe resources.\n\nUsers can define their own sharding implementation by setting the\n`__tmp_hash` label during the target discovery with relabeling\nconfiguration (either in the monitoring resources or via scrape class).",
                    "format": "int32",
--- a/pkg/apis/monitoring/v1/prometheus_types.go
+++ b/pkg/apis/monitoring/v1/prometheus_types.go
@ -1009,6 +1009,16 @@ type PrometheusSpec struct {
 	// Maximum number of bytes used by the Prometheus data.
 	RetentionSize ByteSize `json:"retentionSize,omitempty"`

+	// ShardRetentionPolicy defines the retention policy for the Prometheus shards.
+	// (Alpha) Using this field requires the 'PrometheusShardRetentionPolicy' feature gate to be enabled.
+	//
+	// The final goals for this feature can be seen at https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/proposals/202310-shard-autoscaling.md#graceful-scale-down-of-prometheus-servers,
+	// however, the feature is not yet fully implemented in this PR. The limitation being:
+	// * Retention duration is not settable, for now, shards are retained forever.
+	//
+	// +optional
+	ShardRetentionPolicy *ShardRetentionPolicy `json:"shardRetentionPolicy,omitempty"`
+
 	// When true, the Prometheus compaction is disabled.
 	// When `spec.thanos.objectStorageConfig` or `spec.objectStorageConfigFile` are defined, the operator automatically
 	// disables block compaction to avoid race conditions during block uploads (as the Thanos documentation recommends).
@ -1128,6 +1138,24 @@ type PrometheusSpec struct {
 	EnableAdminAPI bool `json:"enableAdminAPI,omitempty"`
 }

+type WhenScaledRetentionType string
+
+var (
+	RetainWhenScaledRetentionType WhenScaledRetentionType = "Retain"
+	DeleteWhenScaledRetentionType WhenScaledRetentionType = "Delete"
+)
+
+type ShardRetentionPolicy struct {
+	// Defines the retention policy when the Prometheus shards are scaled down.
+	// * `Delete`, the operator will delete the pods from the scaled-down shard(s).
+	// * `Retain`, the operator will keep the pods from the scaled-down shard(s), so the data can still be queried.
+	//
+	// If not defined, the operator assumes the `Delete` value.
+	// +kubebuilder:validation:Enum=Retain;Delete
+	// +optional
+	WhenScaled *WhenScaledRetentionType `json:"whenScaled,omitempty"`
+}
+
 type PrometheusTracingConfig struct {
 	// Client used to export the traces. Supported values are `http` or `grpc`.
 	// +kubebuilder:validation:Enum=http;grpc
--- a/pkg/apis/monitoring/v1/zz_generated.deepcopy.go
+++ b/pkg/apis/monitoring/v1/zz_generated.deepcopy.go
@ -2274,6 +2274,11 @@ func (in *PrometheusRuleSpec) DeepCopy() *PrometheusRuleSpec {
 func (in *PrometheusSpec) DeepCopyInto(out *PrometheusSpec) {
 	*out = *in
 	in.CommonPrometheusFields.DeepCopyInto(&out.CommonPrometheusFields)
+	if in.ShardRetentionPolicy != nil {
+		in, out := &in.ShardRetentionPolicy, &out.ShardRetentionPolicy
+		*out = new(ShardRetentionPolicy)
+		(*in).DeepCopyInto(*out)
+	}
 	out.Rules = in.Rules
 	if in.PrometheusRulesExcludedFromEnforce != nil {
 		in, out := &in.PrometheusRulesExcludedFromEnforce, &out.PrometheusRulesExcludedFromEnforce
@ -3185,6 +3190,26 @@ func (in *ServiceMonitorSpec) DeepCopy() *ServiceMonitorSpec {
 	return out
 }

+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ShardRetentionPolicy) DeepCopyInto(out *ShardRetentionPolicy) {
+	*out = *in
+	if in.WhenScaled != nil {
+		in, out := &in.WhenScaled, &out.WhenScaled
+		*out = new(WhenScaledRetentionType)
+		**out = **in
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ShardRetentionPolicy.
+func (in *ShardRetentionPolicy) DeepCopy() *ShardRetentionPolicy {
+	if in == nil {
+		return nil
+	}
+	out := new(ShardRetentionPolicy)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *ShardStatus) DeepCopyInto(out *ShardStatus) {
 	*out = *in
--- a/pkg/client/applyconfiguration/monitoring/v1/prometheusspec.go
+++ b/pkg/client/applyconfiguration/monitoring/v1/prometheusspec.go
@ -32,6 +32,7 @@ type PrometheusSpecApplyConfiguration struct {
 	SHA                                      *string                                         `json:"sha,omitempty"`
 	Retention                                *monitoringv1.Duration                          `json:"retention,omitempty"`
 	RetentionSize                            *monitoringv1.ByteSize                          `json:"retentionSize,omitempty"`
+	ShardRetentionPolicy                     *ShardRetentionPolicyApplyConfiguration         `json:"shardRetentionPolicy,omitempty"`
 	DisableCompaction                        *bool                                           `json:"disableCompaction,omitempty"`
 	Rules                                    *RulesApplyConfiguration                        `json:"rules,omitempty"`
 	PrometheusRulesExcludedFromEnforce       []PrometheusRuleExcludeConfigApplyConfiguration `json:"prometheusRulesExcludedFromEnforce,omitempty"`
@ -899,6 +900,14 @@ func (b *PrometheusSpecApplyConfiguration) WithRetentionSize(value monitoringv1.
 	return b
 }

+// WithShardRetentionPolicy sets the ShardRetentionPolicy field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the ShardRetentionPolicy field is set to the value of the last call.
+func (b *PrometheusSpecApplyConfiguration) WithShardRetentionPolicy(value *ShardRetentionPolicyApplyConfiguration) *PrometheusSpecApplyConfiguration {
+	b.ShardRetentionPolicy = value
+	return b
+}
+
 // WithDisableCompaction sets the DisableCompaction field in the declarative configuration to the given value
 // and returns the receiver, so that objects can be built by chaining "With" function invocations.
 // If called multiple times, the DisableCompaction field is set to the value of the last call.
--- a/pkg/client/applyconfiguration/monitoring/v1/shardretentionpolicy.go
+++ b/pkg/client/applyconfiguration/monitoring/v1/shardretentionpolicy.go
@ -0,0 +1,41 @@
+// Copyright The prometheus-operator Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1
+
+import (
+	monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
+)
+
+// ShardRetentionPolicyApplyConfiguration represents a declarative configuration of the ShardRetentionPolicy type for use
+// with apply.
+type ShardRetentionPolicyApplyConfiguration struct {
+	WhenScaled *monitoringv1.WhenScaledRetentionType `json:"whenScaled,omitempty"`
+}
+
+// ShardRetentionPolicyApplyConfiguration constructs a declarative configuration of the ShardRetentionPolicy type for use with
+// apply.
+func ShardRetentionPolicy() *ShardRetentionPolicyApplyConfiguration {
+	return &ShardRetentionPolicyApplyConfiguration{}
+}
+
+// WithWhenScaled sets the WhenScaled field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the WhenScaled field is set to the value of the last call.
+func (b *ShardRetentionPolicyApplyConfiguration) WithWhenScaled(value monitoringv1.WhenScaledRetentionType) *ShardRetentionPolicyApplyConfiguration {
+	b.WhenScaled = &value
+	return b
+}
--- a/pkg/client/applyconfiguration/utils.go
+++ b/pkg/client/applyconfiguration/utils.go
@ -178,6 +178,8 @@ func ForKind(kind schema.GroupVersionKind) interface{} {
 		return &monitoringv1.ServiceMonitorApplyConfiguration{}
 	case v1.SchemeGroupVersion.WithKind("ServiceMonitorSpec"):
 		return &monitoringv1.ServiceMonitorSpecApplyConfiguration{}
+	case v1.SchemeGroupVersion.WithKind("ShardRetentionPolicy"):
+		return &monitoringv1.ShardRetentionPolicyApplyConfiguration{}
 	case v1.SchemeGroupVersion.WithKind("ShardStatus"):
 		return &monitoringv1.ShardStatusApplyConfiguration{}
 	case v1.SchemeGroupVersion.WithKind("Sigv4"):
--- a/pkg/operator/config.go
+++ b/pkg/operator/config.go
@ -98,6 +98,10 @@ func DefaultConfig(cpu, memory string) Config {
 				description: "Enables the zone aware sharding for Prometheus",
 				enabled:     false,
 			},
+			PrometheusShardRetentionPolicyFeature: FeatureGate{
+				description: "Enables shard retention policy for Prometheus",
+				enabled:     false,
+			},
 		},
 	}
 }
--- a/pkg/operator/feature_gates.go
+++ b/pkg/operator/feature_gates.go
@ -27,7 +27,8 @@ const (
 	PrometheusAgentDaemonSetFeature FeatureGateName = "PrometheusAgentDaemonSet"

 	// PrometheusTopologySharding enables the zone aware sharding for Prometheus.
-	PrometheusTopologyShardingFeature FeatureGateName = "PrometheusTopologySharding"
+	PrometheusTopologyShardingFeature     FeatureGateName = "PrometheusTopologySharding"
+	PrometheusShardRetentionPolicyFeature FeatureGateName = "PrometheusShardRetentionPolicy"
 )

 type FeatureGateName string
--- a/pkg/prometheus/server/operator.go
+++ b/pkg/prometheus/server/operator.go
@ -91,6 +91,7 @@ type Operator struct {
 	scrapeConfigSupported         bool
 	canReadStorageClass           bool
 	disableUnmanagedConfiguration bool
+	retentionPoliciesEnabled      bool

 	eventRecorder record.EventRecorder
 }
@ -167,8 +168,9 @@ func New(ctx context.Context, restConfig *rest.Config, c operator.Config, logger
 		metrics:         operator.NewMetrics(r),
 		reconciliations: &operator.ReconciliationTracker{},

-		controllerID:  c.ControllerID,
-		eventRecorder: c.EventRecorderFactory(client, controllerName),
+		controllerID:             c.ControllerID,
+		eventRecorder:            c.EventRecorderFactory(client, controllerName),
+		retentionPoliciesEnabled: c.Gates.Enabled(operator.PrometheusShardRetentionPolicyFeature),
 	}
 	for _, opt := range opts {
 		opt(o)
@ -940,6 +942,15 @@ func (c *Operator) sync(ctx context.Context, key string) error {
 			return
 		}

+		shouldRetain, err := c.shouldRetain(p)
+		if err != nil {
+			c.logger.Error("failed to determine if StatefulSet should be retained", "err", err, "name", s.GetName(), "namespace", s.GetNamespace())
+			return
+		}
+		if shouldRetain {
+			return
+		}
+
 		if err := ssetClient.Delete(ctx, s.GetName(), metav1.DeleteOptions{PropagationPolicy: ptr.To(metav1.DeletePropagationForeground)}); err != nil {
 			c.logger.Error("failed to delete StatefulSet object", "err", err, "name", s.GetName(), "namespace", s.GetNamespace())
 		}
@ -951,6 +962,21 @@ func (c *Operator) sync(ctx context.Context, key string) error {
 	return nil
 }

+// As the ShardRetentionPolicy feature evolves, should retain will evolve accordingly.
+// For now, shouldRetain just returns the appropriate boolean based on the retention type.
+func (c *Operator) shouldRetain(p *monitoringv1.Prometheus) (bool, error) {
+	if !c.retentionPoliciesEnabled {
+		// Feature-gate is disabled, default behavior is always to delete.
+		return false, nil
+	}
+	if ptr.Deref(p.Spec.ShardRetentionPolicy.WhenScaled,
+		monitoringv1.DeleteWhenScaledRetentionType) == monitoringv1.RetainWhenScaledRetentionType {
+		return true, nil
+	}
+
+	return false, nil
+}
+
 // UpdateStatus updates the status subresource of the object identified by the given
 // key.
 // UpdateStatus implements the operator.Syncer interface.
--- a/test/e2e/main_test.go
+++ b/test/e2e/main_test.go
@ -417,6 +417,7 @@ func TestGatedFeatures(t *testing.T) {
 		"PromAgentReconcileDaemonSetResourceUpdate": testPromAgentReconcileDaemonSetResourceUpdate,
 		"PromAgentReconcileDaemonSetResourceDelete": testPromAgentReconcileDaemonSetResourceDelete,
 		"PrometheusAgentDaemonSetSelectPodMonitor":  testPrometheusAgentDaemonSetSelectPodMonitor,
+		"PrometheusRetentionPolicies":               testPrometheusRetentionPolicies,
 	}

 	for name, f := range testFuncs {
--- a/test/e2e/prometheus_test.go
+++ b/test/e2e/prometheus_test.go
@ -5351,6 +5351,65 @@ func testPrometheusServiceName(t *testing.T) {
 	require.Equal(t, svcList.Items[0].Name, svc.Name)
 }

+// testPrometheusRetentionPolicies tests the shard retention policies for Prometheus.
+// ShardRetentionPolicy requires the ShardRetention feature gate to be enabled,
+// therefore, it runs in the feature-gated test suite.
+func testPrometheusRetentionPolicies(t *testing.T) {
+	t.Parallel()
+	ctx := context.Background()
+	testCtx := framework.NewTestCtx(t)
+	defer testCtx.Cleanup(t)
+
+	ns := framework.CreateNamespace(ctx, t, testCtx)
+	framework.SetupPrometheusRBAC(ctx, t, testCtx, ns)
+	_, err := framework.CreateOrUpdatePrometheusOperatorWithOpts(
+		ctx, testFramework.PrometheusOperatorOpts{
+			Namespace:           ns,
+			AllowedNamespaces:   []string{ns},
+			EnabledFeatureGates: []string{"PrometheusShardRetentionPolicy"},
+		},
+	)
+	require.NoError(t, err)
+
+	testCases := []struct {
+		name                 string
+		whenScaledDown       *monitoringv1.WhenScaledRetentionType
+		expectedRemainingSts int
+	}{
+		{
+			name:                 "delete",
+			whenScaledDown:       ptr.To(monitoringv1.DeleteWhenScaledRetentionType),
+			expectedRemainingSts: 1,
+		},
+		{
+			name:                 "retain",
+			whenScaledDown:       ptr.To(monitoringv1.RetainWhenScaledRetentionType),
+			expectedRemainingSts: 2,
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			p := framework.MakeBasicPrometheus(ns, tc.name, tc.name, 1)
+			p.Spec.ShardRetentionPolicy = &monitoringv1.ShardRetentionPolicy{
+				WhenScaled: tc.whenScaledDown,
+			}
+			p.Spec.Shards = ptr.To(int32(2))
+			_, err := framework.CreatePrometheusAndWaitUntilReady(ctx, ns, p)
+			require.NoError(t, err, "failed to create Prometheus")
+
+			p, err = framework.ScalePrometheusAndWaitUntilReady(ctx, tc.name, ns, 1)
+			require.NoError(t, err, "failed to scale down Prometheus")
+			require.Equal(t, int32(1), p.Status.Shards, "expected scale of 1 shard")
+
+			podList, err := framework.KubeClient.CoreV1().Pods(ns).List(ctx, metav1.ListOptions{LabelSelector: p.Status.Selector})
+			require.NoError(t, err, "failed to list statefulsets")
+
+			require.Len(t, podList.Items, tc.expectedRemainingSts)
+		})
+	}
+}
+
 func isAlertmanagerDiscoveryWorking(ns, promSVCName, alertmanagerName string) func(ctx context.Context) (bool, error) {
 	return func(ctx context.Context) (bool, error) {
 		pods, err := framework.KubeClient.CoreV1().Pods(ns).List(ctx, alertmanager.ListOptions(alertmanagerName))