mirror of
https://github.com/prometheus-operator/prometheus-operator.git
synced 2025-04-21 11:48:53 +00:00
pkg/*/statefulset.go: Do not mutate shared object
Users have reported high CPU usage of the Prometheus Operator when adding an annotation to a Prometheus object. The Operator would update the respective StatefulSet in an infinite loop. Whether a given StatefulSet needs updating is determined by the hash of the inputs needed to generate the StatefulSet, which is calculated and then attached to the StatefulSet as an annotation. On subsequent reconciliations this hash is compared to the hash of the new inputs. The function to build the StatefulSet definition is passed the Prometheus object. This is done by value, not by reference. This does not enforce a deep copy but merely a shallow copy. In the build function the new StatefulSet would inherit the annotation map of the Prometheus object. Next the input hash would be added to this map, resulting in both the Statefulset having the hash annotation, as intended, as well as the Prometheus object (same map, shared as a reference). On subsequent reconciliations the same Prometheus object is used to calculate the input hash, this time accidentally containing the has annotation from the previous run. Even though the actual inputs never changed, this results in a new hash, thereby updating the StatefulSet, ... The solution is to deep copy the Prometheus object before using it in the StatefulSet build function, thereby never mutating the annotations of the Prometheus object. Same measure is taken for the Alertmanager StatefulSet build function.
This commit is contained in:
parent
416ed2b8d1
commit
13f97851cd
5 changed files with 40 additions and 7 deletions
pkg
test
|
@ -170,6 +170,10 @@ func makeStatefulSetService(p *monitoringv1.Alertmanager, config Config) *v1.Ser
|
|||
}
|
||||
|
||||
func makeStatefulSetSpec(a *monitoringv1.Alertmanager, config Config) (*appsv1.StatefulSetSpec, error) {
|
||||
// Before editing 'a' create deep copy, to prevent side effects. For more
|
||||
// details see https://github.com/coreos/prometheus-operator/issues/1659
|
||||
a = a.DeepCopy()
|
||||
|
||||
tag := a.Spec.Version
|
||||
if a.Spec.Tag != "" {
|
||||
tag = a.Spec.Tag
|
||||
|
|
|
@ -84,6 +84,12 @@ func makeStatefulSet(
|
|||
ruleConfigMapNames []string,
|
||||
inputHash string,
|
||||
) (*appsv1.StatefulSet, error) {
|
||||
// p is passed in by value, not by reference. But p contains references like
|
||||
// to annotation map, that do not get copied on function invocation. Ensure to
|
||||
// prevent side effects before editing p by creating a deep copy. For more
|
||||
// details see https://github.com/coreos/prometheus-operator/issues/1659.
|
||||
p = *p.DeepCopy()
|
||||
|
||||
// TODO(fabxc): is this the right point to inject defaults?
|
||||
// Ideally we would do it before storing but that's currently not possible.
|
||||
// Potentially an update handler on first insertion.
|
||||
|
|
|
@ -54,8 +54,21 @@ func TestStatefulSetLabelingAndAnnotations(t *testing.T) {
|
|||
|
||||
require.NoError(t, err)
|
||||
|
||||
if !reflect.DeepEqual(labels, sset.Labels) || !reflect.DeepEqual(annotations, sset.Annotations) {
|
||||
t.Fatal("Labels or Annotations are not properly being propagated to the StatefulSet")
|
||||
if !reflect.DeepEqual(labels, sset.Labels) {
|
||||
fmt.Println(pretty.Compare(labels, sset.Labels))
|
||||
t.Fatal("Labels are not properly being propagated to the StatefulSet")
|
||||
}
|
||||
|
||||
expectedAnnotations := map[string]string{
|
||||
"prometheus-operator-input-hash": "",
|
||||
}
|
||||
for k, v := range annotations {
|
||||
expectedAnnotations[k] = v
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(expectedAnnotations, sset.Annotations) {
|
||||
fmt.Println(pretty.Compare(expectedAnnotations, sset.Annotations))
|
||||
t.Fatal("Annotations are not properly being propagated to the StatefulSet")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -693,6 +693,12 @@ func TestPrometheusOnlyUpdatedOnRelevantChanges(t *testing.T) {
|
|||
name := "test"
|
||||
prometheus := framework.MakeBasicPrometheus(ns, name, name, 1)
|
||||
|
||||
// Adding an annotation to Prometheus lead to high CPU usage in the past
|
||||
// updating the Prometheus StatefulSet in a loop (See
|
||||
// https://github.com/coreos/prometheus-operator/issues/1659). Added here to
|
||||
// prevent a regression.
|
||||
prometheus.Annotations["test-annotation"] = "test-value"
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
type versionedResource interface {
|
||||
|
|
|
@ -35,8 +35,9 @@ import (
|
|||
func (f *Framework) MakeBasicPrometheus(ns, name, group string, replicas int32) *monitoringv1.Prometheus {
|
||||
return &monitoringv1.Prometheus{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: name,
|
||||
Namespace: ns,
|
||||
Name: name,
|
||||
Namespace: ns,
|
||||
Annotations: map[string]string{},
|
||||
},
|
||||
Spec: monitoringv1.PrometheusSpec{
|
||||
Replicas: &replicas,
|
||||
|
@ -235,11 +236,11 @@ func (f *Framework) WaitForPrometheusReady(p *monitoringv1.Prometheus, timeout t
|
|||
func (f *Framework) DeletePrometheusAndWaitUntilGone(ns, name string) error {
|
||||
_, err := f.MonClientV1.Prometheuses(ns).Get(name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return errors.Wrap(err, fmt.Sprintf("requesting Prometheus tpr %v failed", name))
|
||||
return errors.Wrap(err, fmt.Sprintf("requesting Prometheus custom resource %v failed", name))
|
||||
}
|
||||
|
||||
if err := f.MonClientV1.Prometheuses(ns).Delete(name, nil); err != nil {
|
||||
return errors.Wrap(err, fmt.Sprintf("deleting Prometheus tpr %v failed", name))
|
||||
return errors.Wrap(err, fmt.Sprintf("deleting Prometheus custom resource %v failed", name))
|
||||
}
|
||||
|
||||
if err := WaitForPodsReady(
|
||||
|
@ -249,7 +250,10 @@ func (f *Framework) DeletePrometheusAndWaitUntilGone(ns, name string) error {
|
|||
0,
|
||||
prometheus.ListOptions(name),
|
||||
); err != nil {
|
||||
return errors.Wrap(err, fmt.Sprintf("waiting for Prometheus tpr (%s) to vanish timed out", name))
|
||||
return errors.Wrap(
|
||||
err,
|
||||
fmt.Sprintf("waiting for Prometheus custom resource (%s) to vanish timed out", name),
|
||||
)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue