1
0
Fork 0
mirror of https://github.com/prometheus-operator/prometheus-operator.git synced 2025-04-21 11:48:53 +00:00

pkg/*/statefulset.go: Do not mutate shared object

Users have reported high CPU usage of the Prometheus Operator when
adding an annotation to a Prometheus object. The Operator would update
the respective StatefulSet in an infinite loop.

Whether a given StatefulSet needs updating is determined by the hash of
the inputs needed to generate the StatefulSet, which is calculated and
then attached to the StatefulSet as an annotation. On subsequent
reconciliations this hash is compared to the hash of the new inputs.

The function to build the StatefulSet definition is passed the
Prometheus object. This is done by value, not by reference. This does
not enforce a deep copy but merely a shallow copy. In the build function
the new StatefulSet would inherit the annotation map of the Prometheus
object. Next the input hash would be added to this map, resulting in
both the Statefulset having the hash annotation, as intended, as well as
the Prometheus object (same map, shared as a reference).

On subsequent reconciliations the same Prometheus object is used to
calculate the input hash, this time accidentally containing the has
annotation from the previous run. Even though the actual inputs never
changed, this results in a new hash, thereby updating the StatefulSet,
...

The solution is to deep copy the Prometheus object before using it in
the StatefulSet build function, thereby never mutating the annotations
of the Prometheus object. Same measure is taken for the Alertmanager
StatefulSet build function.
This commit is contained in:
Max Leonard Inden 2018-08-13 13:42:41 +02:00
parent 416ed2b8d1
commit 13f97851cd
No known key found for this signature in database
GPG key ID: 5403C5464810BC26
5 changed files with 40 additions and 7 deletions

View file

@ -170,6 +170,10 @@ func makeStatefulSetService(p *monitoringv1.Alertmanager, config Config) *v1.Ser
}
func makeStatefulSetSpec(a *monitoringv1.Alertmanager, config Config) (*appsv1.StatefulSetSpec, error) {
// Before editing 'a' create deep copy, to prevent side effects. For more
// details see https://github.com/coreos/prometheus-operator/issues/1659
a = a.DeepCopy()
tag := a.Spec.Version
if a.Spec.Tag != "" {
tag = a.Spec.Tag

View file

@ -84,6 +84,12 @@ func makeStatefulSet(
ruleConfigMapNames []string,
inputHash string,
) (*appsv1.StatefulSet, error) {
// p is passed in by value, not by reference. But p contains references like
// to annotation map, that do not get copied on function invocation. Ensure to
// prevent side effects before editing p by creating a deep copy. For more
// details see https://github.com/coreos/prometheus-operator/issues/1659.
p = *p.DeepCopy()
// TODO(fabxc): is this the right point to inject defaults?
// Ideally we would do it before storing but that's currently not possible.
// Potentially an update handler on first insertion.

View file

@ -54,8 +54,21 @@ func TestStatefulSetLabelingAndAnnotations(t *testing.T) {
require.NoError(t, err)
if !reflect.DeepEqual(labels, sset.Labels) || !reflect.DeepEqual(annotations, sset.Annotations) {
t.Fatal("Labels or Annotations are not properly being propagated to the StatefulSet")
if !reflect.DeepEqual(labels, sset.Labels) {
fmt.Println(pretty.Compare(labels, sset.Labels))
t.Fatal("Labels are not properly being propagated to the StatefulSet")
}
expectedAnnotations := map[string]string{
"prometheus-operator-input-hash": "",
}
for k, v := range annotations {
expectedAnnotations[k] = v
}
if !reflect.DeepEqual(expectedAnnotations, sset.Annotations) {
fmt.Println(pretty.Compare(expectedAnnotations, sset.Annotations))
t.Fatal("Annotations are not properly being propagated to the StatefulSet")
}
}

View file

@ -693,6 +693,12 @@ func TestPrometheusOnlyUpdatedOnRelevantChanges(t *testing.T) {
name := "test"
prometheus := framework.MakeBasicPrometheus(ns, name, name, 1)
// Adding an annotation to Prometheus lead to high CPU usage in the past
// updating the Prometheus StatefulSet in a loop (See
// https://github.com/coreos/prometheus-operator/issues/1659). Added here to
// prevent a regression.
prometheus.Annotations["test-annotation"] = "test-value"
ctx, cancel := context.WithCancel(context.Background())
type versionedResource interface {

View file

@ -35,8 +35,9 @@ import (
func (f *Framework) MakeBasicPrometheus(ns, name, group string, replicas int32) *monitoringv1.Prometheus {
return &monitoringv1.Prometheus{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: ns,
Name: name,
Namespace: ns,
Annotations: map[string]string{},
},
Spec: monitoringv1.PrometheusSpec{
Replicas: &replicas,
@ -235,11 +236,11 @@ func (f *Framework) WaitForPrometheusReady(p *monitoringv1.Prometheus, timeout t
func (f *Framework) DeletePrometheusAndWaitUntilGone(ns, name string) error {
_, err := f.MonClientV1.Prometheuses(ns).Get(name, metav1.GetOptions{})
if err != nil {
return errors.Wrap(err, fmt.Sprintf("requesting Prometheus tpr %v failed", name))
return errors.Wrap(err, fmt.Sprintf("requesting Prometheus custom resource %v failed", name))
}
if err := f.MonClientV1.Prometheuses(ns).Delete(name, nil); err != nil {
return errors.Wrap(err, fmt.Sprintf("deleting Prometheus tpr %v failed", name))
return errors.Wrap(err, fmt.Sprintf("deleting Prometheus custom resource %v failed", name))
}
if err := WaitForPodsReady(
@ -249,7 +250,10 @@ func (f *Framework) DeletePrometheusAndWaitUntilGone(ns, name string) error {
0,
prometheus.ListOptions(name),
); err != nil {
return errors.Wrap(err, fmt.Sprintf("waiting for Prometheus tpr (%s) to vanish timed out", name))
return errors.Wrap(
err,
fmt.Sprintf("waiting for Prometheus custom resource (%s) to vanish timed out", name),
)
}
return nil