1
0
Fork 0
mirror of https://github.com/prometheus-operator/prometheus-operator.git synced 2025-04-15 16:56:24 +00:00

chore: Adding StartupProbeTimeoutSeconds to Prometheus CRD (#6137)

---------

Signed-off-by: Nicolas Takashi <nicolas.tcs@hotmail.com>
Co-authored-by: Simon Pasquier <spasquie@redhat.com>
This commit is contained in:
Nicolas Takashi 2023-12-19 13:12:42 +00:00 committed by GitHub
parent 27f76054cf
commit 01765e5a76
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 355 additions and 4 deletions

65
Documentation/api.md generated
View file

@ -2560,6 +2560,19 @@ If not specified, the configuration is reloaded using the /-/reload HTTP endpoin
</tr>
<tr>
<td>
<code>maximumStartupDurationSeconds</code><br/>
<em>
int32
</em>
</td>
<td>
<em>(Optional)</em>
<p>Defines the maximum time that the <code>prometheus</code> container&rsquo;s startup probe will wait before being considered failed. The startup probe will return success after the WAL replay is complete.
If set, the value should be greater than 60 (seconds). Otherwise it will be equal to 600 seconds (15 minutes).</p>
</td>
</tr>
<tr>
<td>
<code>baseImage</code><br/>
<em>
string
@ -6938,6 +6951,19 @@ ReloadStrategyType
If not specified, the configuration is reloaded using the /-/reload HTTP endpoint.</p>
</td>
</tr>
<tr>
<td>
<code>maximumStartupDurationSeconds</code><br/>
<em>
int32
</em>
</td>
<td>
<em>(Optional)</em>
<p>Defines the maximum time that the <code>prometheus</code> container&rsquo;s startup probe will wait before being considered failed. The startup probe will return success after the WAL replay is complete.
If set, the value should be greater than 60 (seconds). Otherwise it will be equal to 600 seconds (15 minutes).</p>
</td>
</tr>
</tbody>
</table>
<h3 id="monitoring.coreos.com/v1.Condition">Condition
@ -11054,6 +11080,19 @@ If not specified, the configuration is reloaded using the /-/reload HTTP endpoin
</tr>
<tr>
<td>
<code>maximumStartupDurationSeconds</code><br/>
<em>
int32
</em>
</td>
<td>
<em>(Optional)</em>
<p>Defines the maximum time that the <code>prometheus</code> container&rsquo;s startup probe will wait before being considered failed. The startup probe will return success after the WAL replay is complete.
If set, the value should be greater than 60 (seconds). Otherwise it will be equal to 600 seconds (15 minutes).</p>
</td>
</tr>
<tr>
<td>
<code>baseImage</code><br/>
<em>
string
@ -16599,6 +16638,19 @@ ReloadStrategyType
If not specified, the configuration is reloaded using the /-/reload HTTP endpoint.</p>
</td>
</tr>
<tr>
<td>
<code>maximumStartupDurationSeconds</code><br/>
<em>
int32
</em>
</td>
<td>
<em>(Optional)</em>
<p>Defines the maximum time that the <code>prometheus</code> container&rsquo;s startup probe will wait before being considered failed. The startup probe will return success after the WAL replay is complete.
If set, the value should be greater than 60 (seconds). Otherwise it will be equal to 600 seconds (15 minutes).</p>
</td>
</tr>
</table>
</td>
</tr>
@ -21018,6 +21070,19 @@ ReloadStrategyType
If not specified, the configuration is reloaded using the /-/reload HTTP endpoint.</p>
</td>
</tr>
<tr>
<td>
<code>maximumStartupDurationSeconds</code><br/>
<em>
int32
</em>
</td>
<td>
<em>(Optional)</em>
<p>Defines the maximum time that the <code>prometheus</code> container&rsquo;s startup probe will wait before being considered failed. The startup probe will return success after the WAL replay is complete.
If set, the value should be greater than 60 (seconds). Otherwise it will be equal to 600 seconds (15 minutes).</p>
</td>
</tr>
</tbody>
</table>
<h3 id="monitoring.coreos.com/v1alpha1.ProxyConfig">ProxyConfig

18
bundle.yaml generated
View file

@ -18655,6 +18655,15 @@ spec:
- warn
- error
type: string
maximumStartupDurationSeconds:
description: Defines the maximum time that the `prometheus` container's
startup probe will wait before being considered failed. The startup
probe will return success after the WAL replay is complete. If set,
the value should be greater than 60 (seconds). Otherwise it will
be equal to 600 seconds (15 minutes).
format: int32
minimum: 60
type: integer
minReadySeconds:
description: "Minimum number of seconds for which a newly created
Pod should be ready without any of its container crashing for it
@ -27609,6 +27618,15 @@ spec:
- warn
- error
type: string
maximumStartupDurationSeconds:
description: Defines the maximum time that the `prometheus` container's
startup probe will wait before being considered failed. The startup
probe will return success after the WAL replay is complete. If set,
the value should be greater than 60 (seconds). Otherwise it will
be equal to 600 seconds (15 minutes).
format: int32
minimum: 60
type: integer
minReadySeconds:
description: "Minimum number of seconds for which a newly created
Pod should be ready without any of its container crashing for it

View file

@ -4054,6 +4054,15 @@ spec:
- warn
- error
type: string
maximumStartupDurationSeconds:
description: Defines the maximum time that the `prometheus` container's
startup probe will wait before being considered failed. The startup
probe will return success after the WAL replay is complete. If set,
the value should be greater than 60 (seconds). Otherwise it will
be equal to 600 seconds (15 minutes).
format: int32
minimum: 60
type: integer
minReadySeconds:
description: "Minimum number of seconds for which a newly created
Pod should be ready without any of its container crashing for it

View file

@ -4485,6 +4485,15 @@ spec:
- warn
- error
type: string
maximumStartupDurationSeconds:
description: Defines the maximum time that the `prometheus` container's
startup probe will wait before being considered failed. The startup
probe will return success after the WAL replay is complete. If set,
the value should be greater than 60 (seconds). Otherwise it will
be equal to 600 seconds (15 minutes).
format: int32
minimum: 60
type: integer
minReadySeconds:
description: "Minimum number of seconds for which a newly created
Pod should be ready without any of its container crashing for it

View file

@ -4055,6 +4055,15 @@ spec:
- warn
- error
type: string
maximumStartupDurationSeconds:
description: Defines the maximum time that the `prometheus` container's
startup probe will wait before being considered failed. The startup
probe will return success after the WAL replay is complete. If set,
the value should be greater than 60 (seconds). Otherwise it will
be equal to 600 seconds (15 minutes).
format: int32
minimum: 60
type: integer
minReadySeconds:
description: "Minimum number of seconds for which a newly created
Pod should be ready without any of its container crashing for it

View file

@ -4486,6 +4486,15 @@ spec:
- warn
- error
type: string
maximumStartupDurationSeconds:
description: Defines the maximum time that the `prometheus` container's
startup probe will wait before being considered failed. The startup
probe will return success after the WAL replay is complete. If set,
the value should be greater than 60 (seconds). Otherwise it will
be equal to 600 seconds (15 minutes).
format: int32
minimum: 60
type: integer
minReadySeconds:
description: "Minimum number of seconds for which a newly created
Pod should be ready without any of its container crashing for it

View file

@ -3632,6 +3632,12 @@
],
"type": "string"
},
"maximumStartupDurationSeconds": {
"description": "Defines the maximum time that the `prometheus` container's startup probe will wait before being considered failed. The startup probe will return success after the WAL replay is complete. If set, the value should be greater than 60 (seconds). Otherwise it will be equal to 600 seconds (15 minutes).",
"format": "int32",
"minimum": 60,
"type": "integer"
},
"minReadySeconds": {
"description": "Minimum number of seconds for which a newly created Pod should be ready without any of its container crashing for it to be considered available. Defaults to 0 (pod will be considered available as soon as it is ready) \n This is an alpha field from kubernetes 1.22 until 1.24 which requires enabling the StatefulSetMinReadySeconds feature gate.",
"format": "int32",

View file

@ -4070,6 +4070,12 @@
],
"type": "string"
},
"maximumStartupDurationSeconds": {
"description": "Defines the maximum time that the `prometheus` container's startup probe will wait before being considered failed. The startup probe will return success after the WAL replay is complete. If set, the value should be greater than 60 (seconds). Otherwise it will be equal to 600 seconds (15 minutes).",
"format": "int32",
"minimum": 60,
"type": "integer"
},
"minReadySeconds": {
"description": "Minimum number of seconds for which a newly created Pod should be ready without any of its container crashing for it to be considered available. Defaults to 0 (pod will be considered available as soon as it is ready) \n This is an alpha field from kubernetes 1.22 until 1.24 which requires enabling the StatefulSetMinReadySeconds feature gate.",
"format": "int32",

View file

@ -643,6 +643,12 @@ type CommonPrometheusFields struct {
// If not specified, the configuration is reloaded using the /-/reload HTTP endpoint.
// +optional
ReloadStrategy *ReloadStrategyType `json:"reloadStrategy,omitempty"`
// Defines the maximum time that the `prometheus` container's startup probe will wait before being considered failed. The startup probe will return success after the WAL replay is complete.
// If set, the value should be greater than 60 (seconds). Otherwise it will be equal to 600 seconds (15 minutes).
// +optional
// +kubebuilder:validation:Minimum=60
MaximumStartupDurationSeconds *int32 `json:"maximumStartupDurationSeconds,omitempty"`
}
// +kubebuilder:validation:Enum=HTTP;ProcessSignal

View file

@ -898,6 +898,11 @@ func (in *CommonPrometheusFields) DeepCopyInto(out *CommonPrometheusFields) {
*out = new(ReloadStrategyType)
**out = **in
}
if in.MaximumStartupDurationSeconds != nil {
in, out := &in.MaximumStartupDurationSeconds, &out.MaximumStartupDurationSeconds
*out = new(int32)
**out = **in
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CommonPrometheusFields.

View file

@ -103,6 +103,7 @@ type CommonPrometheusFieldsApplyConfiguration struct {
LabelValueLengthLimit *uint64 `json:"labelValueLengthLimit,omitempty"`
KeepDroppedTargets *uint64 `json:"keepDroppedTargets,omitempty"`
ReloadStrategy *monitoringv1.ReloadStrategyType `json:"reloadStrategy,omitempty"`
MaximumStartupDurationSeconds *int32 `json:"maximumStartupDurationSeconds,omitempty"`
}
// CommonPrometheusFieldsApplyConfiguration constructs an declarative configuration of the CommonPrometheusFields type for use with
@ -783,3 +784,11 @@ func (b *CommonPrometheusFieldsApplyConfiguration) WithReloadStrategy(value moni
b.ReloadStrategy = &value
return b
}
// WithMaximumStartupDurationSeconds sets the MaximumStartupDurationSeconds field in the declarative configuration to the given value
// and returns the receiver, so that objects can be built by chaining "With" function invocations.
// If called multiple times, the MaximumStartupDurationSeconds field is set to the value of the last call.
func (b *CommonPrometheusFieldsApplyConfiguration) WithMaximumStartupDurationSeconds(value int32) *CommonPrometheusFieldsApplyConfiguration {
b.MaximumStartupDurationSeconds = &value
return b
}

View file

@ -730,6 +730,14 @@ func (b *PrometheusSpecApplyConfiguration) WithReloadStrategy(value monitoringv1
return b
}
// WithMaximumStartupDurationSeconds sets the MaximumStartupDurationSeconds field in the declarative configuration to the given value
// and returns the receiver, so that objects can be built by chaining "With" function invocations.
// If called multiple times, the MaximumStartupDurationSeconds field is set to the value of the last call.
func (b *PrometheusSpecApplyConfiguration) WithMaximumStartupDurationSeconds(value int32) *PrometheusSpecApplyConfiguration {
b.MaximumStartupDurationSeconds = &value
return b
}
// WithBaseImage sets the BaseImage field in the declarative configuration to the given value
// and returns the receiver, so that objects can be built by chaining "With" function invocations.
// If called multiple times, the BaseImage field is set to the value of the last call.

View file

@ -708,3 +708,11 @@ func (b *PrometheusAgentSpecApplyConfiguration) WithReloadStrategy(value monitor
b.ReloadStrategy = &value
return b
}
// WithMaximumStartupDurationSeconds sets the MaximumStartupDurationSeconds field in the declarative configuration to the given value
// and returns the receiver, so that objects can be built by chaining "With" function invocations.
// If called multiple times, the MaximumStartupDurationSeconds field is set to the value of the last call.
func (b *PrometheusAgentSpecApplyConfiguration) WithMaximumStartupDurationSeconds(value int32) *PrometheusAgentSpecApplyConfiguration {
b.MaximumStartupDurationSeconds = &value
return b
}

View file

@ -242,11 +242,12 @@ func makeStatefulSetSpec(
// We don't want to use the /-/healthy handler here because it returns OK as
// soon as the web server is started (irrespective of the WAL replay).
readyProbeHandler := prompkg.ProbeHandler("/-/ready", cpf, webConfigGenerator)
startupPeriodSeconds, startupFailureThreshold := prompkg.GetStatupProbePeriodSecondsAndFailureThreshold(cpf)
startupProbe := &v1.Probe{
ProbeHandler: readyProbeHandler,
TimeoutSeconds: prompkg.ProbeTimeoutSeconds,
PeriodSeconds: 15,
FailureThreshold: 60,
PeriodSeconds: startupPeriodSeconds,
FailureThreshold: startupFailureThreshold,
}
readinessProbe := &v1.Probe{

View file

@ -189,6 +189,40 @@ func TestWALCompression(t *testing.T) {
}
}
func TestStartupProbeTimeoutSeconds(t *testing.T) {
tests := []struct {
maximumStartupDurationSeconds *int32
expectedStartupPeriodSeconds int32
expectedStartupFailureThreshold int32
}{
{
maximumStartupDurationSeconds: nil,
expectedStartupPeriodSeconds: 15,
expectedStartupFailureThreshold: 60,
},
{
maximumStartupDurationSeconds: ptr.To(int32(600)),
expectedStartupPeriodSeconds: 60,
expectedStartupFailureThreshold: 10,
},
}
for _, test := range tests {
sset, err := makeStatefulSetFromPrometheus(monitoringv1alpha1.PrometheusAgent{
Spec: monitoringv1alpha1.PrometheusAgentSpec{
CommonPrometheusFields: monitoringv1.CommonPrometheusFields{
MaximumStartupDurationSeconds: test.maximumStartupDurationSeconds,
},
},
})
require.NoError(t, err)
require.NotNil(t, sset.Spec.Template.Spec.Containers[0].StartupProbe)
require.Equal(t, test.expectedStartupPeriodSeconds, sset.Spec.Template.Spec.Containers[0].StartupProbe.PeriodSeconds)
require.Equal(t, test.expectedStartupFailureThreshold, sset.Spec.Template.Spec.Containers[0].StartupProbe.FailureThreshold)
}
}
func newLogger() log.Logger {
return level.NewFilter(log.NewLogfmtLogger(os.Stdout), level.AllowWarn())
}

View file

@ -317,11 +317,12 @@ func makeStatefulSetSpec(
// We don't want to use the /-/healthy handler here because it returns OK as
// soon as the web server is started (irrespective of the WAL replay).
readyProbeHandler := prompkg.ProbeHandler("/-/ready", cpf, webConfigGenerator)
startupPeriodSeconds, startupFailureThreshold := prompkg.GetStatupProbePeriodSecondsAndFailureThreshold(cpf)
startupProbe := &v1.Probe{
ProbeHandler: readyProbeHandler,
TimeoutSeconds: prompkg.ProbeTimeoutSeconds,
PeriodSeconds: 15,
FailureThreshold: 60,
PeriodSeconds: startupPeriodSeconds,
FailureThreshold: startupFailureThreshold,
}
readinessProbe := &v1.Probe{

View file

@ -3074,3 +3074,38 @@ func TestPodTopologySpreadConstraintWithAdditionalLabels(t *testing.T) {
})
}
}
func TestStartupProbeTimeoutSeconds(t *testing.T) {
tests := []struct {
maximumStartupDurationSeconds *int32
expectedStartupPeriodSeconds int32
expectedStartupFailureThreshold int32
}{
{
maximumStartupDurationSeconds: nil,
expectedStartupPeriodSeconds: 15,
expectedStartupFailureThreshold: 60,
},
{
maximumStartupDurationSeconds: ptr.To(int32(600)),
expectedStartupPeriodSeconds: 60,
expectedStartupFailureThreshold: 10,
},
}
for _, test := range tests {
sset, err := makeStatefulSetFromPrometheus(monitoringv1.Prometheus{
ObjectMeta: metav1.ObjectMeta{},
Spec: monitoringv1.PrometheusSpec{
CommonPrometheusFields: monitoringv1.CommonPrometheusFields{
MaximumStartupDurationSeconds: test.maximumStartupDurationSeconds,
},
},
})
require.NoError(t, err)
require.NotNil(t, sset.Spec.Template.Spec.Containers[0].StartupProbe)
require.Equal(t, test.expectedStartupPeriodSeconds, sset.Spec.Template.Spec.Containers[0].StartupProbe.PeriodSeconds)
require.Equal(t, test.expectedStartupFailureThreshold, sset.Spec.Template.Spec.Containers[0].StartupProbe.FailureThreshold)
}
}

View file

@ -17,6 +17,7 @@ package prometheus
import (
"bytes"
"fmt"
"math"
"net/url"
"path"
"path/filepath"
@ -526,3 +527,17 @@ func MakeK8sTopologySpreadConstraint(selectorLabels map[string]string, tscs []mo
return coreTscs
}
func GetStatupProbePeriodSecondsAndFailureThreshold(cfp monitoringv1.CommonPrometheusFields) (int32, int32) {
var startupPeriodSeconds float64 = 15
var startupFailureThreshold float64 = 60
maximumStartupDurationSeconds := float64(ptr.Deref(cfp.MaximumStartupDurationSeconds, 0))
if maximumStartupDurationSeconds >= 60 {
startupFailureThreshold = math.Ceil(maximumStartupDurationSeconds / 60)
startupPeriodSeconds = math.Ceil(maximumStartupDurationSeconds / startupFailureThreshold)
}
return int32(startupPeriodSeconds), int32(startupFailureThreshold)
}

View file

@ -0,0 +1,98 @@
// Copyright 2023 The prometheus-operator Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package prometheus
import (
"testing"
"github.com/stretchr/testify/require"
"k8s.io/utils/ptr"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
)
func TestStartupProbeTimeoutSeconds(t *testing.T) {
tests := []struct {
maximumStartupDurationSeconds *int32
expectedStartupPeriodSeconds int32
expectedStartupFailureThreshold int32
expectedMaxStartupDuration int32
}{
{
maximumStartupDurationSeconds: nil,
expectedStartupPeriodSeconds: 15,
expectedStartupFailureThreshold: 60,
expectedMaxStartupDuration: 900,
},
{
maximumStartupDurationSeconds: ptr.To(int32(0)),
expectedStartupPeriodSeconds: 15,
expectedStartupFailureThreshold: 60,
expectedMaxStartupDuration: 900,
},
{
maximumStartupDurationSeconds: ptr.To(int32(1)),
expectedStartupPeriodSeconds: 15,
expectedStartupFailureThreshold: 60,
expectedMaxStartupDuration: 900,
},
{
maximumStartupDurationSeconds: ptr.To(int32(60)),
expectedStartupPeriodSeconds: 60,
expectedStartupFailureThreshold: 1,
expectedMaxStartupDuration: 60,
},
{
maximumStartupDurationSeconds: ptr.To(int32(600)),
expectedStartupPeriodSeconds: 60,
expectedStartupFailureThreshold: 10,
expectedMaxStartupDuration: 600,
},
{
maximumStartupDurationSeconds: ptr.To(int32(900)),
expectedStartupPeriodSeconds: 60,
expectedStartupFailureThreshold: 15,
expectedMaxStartupDuration: 900,
},
{
maximumStartupDurationSeconds: ptr.To(int32(1200)),
expectedStartupPeriodSeconds: 60,
expectedStartupFailureThreshold: 20,
expectedMaxStartupDuration: 1200,
},
{
maximumStartupDurationSeconds: ptr.To(int32(129)),
expectedStartupPeriodSeconds: 43,
expectedStartupFailureThreshold: 3,
expectedMaxStartupDuration: 129,
},
{
maximumStartupDurationSeconds: ptr.To(int32(322)),
expectedStartupPeriodSeconds: 54,
expectedStartupFailureThreshold: 6,
expectedMaxStartupDuration: 324,
},
}
for _, test := range tests {
startupPeriodSeconds, startupFailureThreshold := GetStatupProbePeriodSecondsAndFailureThreshold(monitoringv1.CommonPrometheusFields{
MaximumStartupDurationSeconds: test.maximumStartupDurationSeconds,
})
require.Equal(t, test.expectedStartupPeriodSeconds, startupPeriodSeconds)
require.Equal(t, test.expectedStartupFailureThreshold, startupFailureThreshold)
require.Equal(t, test.expectedStartupPeriodSeconds*test.expectedStartupFailureThreshold, test.expectedMaxStartupDuration)
}
}