Merge remote-tracking branch 'upstream/master' into crd

2025-04-21 19:49:46 +00:00 · 2017-07-21 16:37:55 +05:30 · 2017-07-21 16:37:55 +05:30 · c3f957d2ab
commit c3f957d2ab
parent 0a560374cf 58000dae05
80 changed files with 1281 additions and 476 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,3 +1,15 @@
+## 0.11.0 / 2017-07-20
+
+Warning: This release deprecates the previously used storage definition in favor of upstream PersistentVolumeClaim templates. While this should not have an immediate effect on a running cluster, Prometheus object definitions that have storage configured need to be adapted. The previously existing fields are still there, but have no effect anymore.
+
+* [FEATURE] Add Prometheus 2.0 alpha3 support.
+* [FEATURE] Use PVC templates instead of custom storage definition.
+* [FEATURE] Add cAdvisor port to kubelet sync.
+* [FEATURE] Allow default base images to be configurable.
+* [FEATURE] Configure Prometheus to only use necessary namespaces.
+* [ENHANCEMENT] Improve rollout detection for Alertmanager clusters.
+* [BUGFIX] Fix targetPort relabeling.
+
 ## 0.10.2 / 2017-06-21

 * [BUGFIX] Use computed route prefix instead of directly from manifest.
--- a/Documentation/api.md
+++ b/Documentation/api.md
@ -6,7 +6,7 @@ This Document documents the types introduced by the Prometheus Operator to be co

 ## AlertingSpec

-AlertingSpec defines paramters for alerting configuration of Prometheus servers.
+AlertingSpec defines parameters for alerting configuration of Prometheus servers.

 | Field | Description | Scheme | Required |
 | ----- | ----------- | ------ | -------- |
@ -199,7 +199,7 @@ StorageSpec defines the configured storage for a group Prometheus servers.
 | class | Name of the StorageClass to use when requesting storage provisioning. More info: https://kubernetes.io/docs/user-guide/persistent-volumes/#storageclasses DEPRECATED | string | true |
 | selector | A label query over volumes to consider for binding. DEPRECATED | *[metav1.LabelSelector](https://kubernetes.io/docs/api-reference/v1.6/#labelselector-v1-meta) | true |
 | resources | Resources represents the minimum resources the volume should have. More info: http://kubernetes.io/docs/user-guide/persistent-volumes#resources DEPRECATED | [v1.ResourceRequirements](https://kubernetes.io/docs/api-reference/v1.6/#resourcerequirements-v1-core) | true |
-| volumeClaimTemplate | Pvc A pvc spec to be used by the Prometheus statefulsets. | v1.PersistentVolumeClaim | false |
+| volumeClaimTemplate | A PVC spec to be used by the Prometheus StatefulSets. | [v1.PersistentVolumeClaim](https://kubernetes.io/docs/api-reference/v1.6/#persistentvolumeclaim-v1-core) | false |

 ## TLSConfig

--- a/Documentation/user-guides/cluster-monitoring.md
+++ b/Documentation/user-guides/cluster-monitoring.md
@ -43,7 +43,7 @@ spec:
      serviceAccountName: prometheus-operator
      containers:
      - name: prometheus-operator
-        image: quay.io/coreos/prometheus-operator:v0.10.1
+        image: quay.io/coreos/prometheus-operator:v0.11.0
        args:
        - "--kubelet-service=kube-system/kubelet"
        - "--config-reloader-image=quay.io/coreos/configmap-reload:v0.0.1"
@ -326,6 +326,8 @@ spec:
  endpoints:
  - port: http-metrics
    interval: 30s
+  - port: cadvisor
+    interval: 30s
    honorLabels: true
  selector:
    matchLabels:
--- a/Documentation/user-guides/getting-started.md
+++ b/Documentation/user-guides/getting-started.md
@ -85,7 +85,7 @@ spec:
      serviceAccountName: prometheus-operator
      containers:
      - name: prometheus-operator
-        image: quay.io/coreos/prometheus-operator:v0.10.1
+        image: quay.io/coreos/prometheus-operator:v0.11.0
        resources:
          requests:
            cpu: 100m
--- a/12
+++ b/12
@ -66,6 +66,12 @@ job('po-tests-pr') {
            onlyIfBuildSucceeds(false)
            onlyIfBuildFails(false)
        }
+        postBuildScripts {
+            archiveArtifacts('build/**/*')
+            onlyIfBuildSucceeds(false)
+            onlyIfBuildFails(false)
+        }
+        wsCleanup()
    }
 }

@ -125,6 +131,11 @@ job('po-tests-master') {
            onlyIfBuildSucceeds(false)
            onlyIfBuildFails(false)
        }
+        postBuildScripts {
+            archiveArtifacts('build/**/*')
+            onlyIfBuildSucceeds(false)
+            onlyIfBuildFails(false)
+        }
        slackNotifier {
            room('#team-monitoring')
            teamDomain('coreos')
@ -133,5 +144,6 @@ job('po-tests-master') {
            notifyRegression(true)
            notifyRepeatedFailure(true)
        }
+        wsCleanup()
    }
 }
--- a/9
+++ b/9
@ -1,12 +1,11 @@
 REPO?=quay.io/coreos/prometheus-operator
 TAG?=$(shell git rev-parse --short HEAD)
 NAMESPACE?=po-e2e-$(shell LC_CTYPE=C tr -dc a-z0-9 < /dev/urandom | head -c 13 ; echo '')
+KUBECONFIG?=$(HOME)/.kube/config

 PROMU := $(GOPATH)/bin/promu
 PREFIX ?= $(shell pwd)

-CLUSTER_IP?=$(shell kubectl config view --minify | grep server: | cut -f 3 -d ":" | tr -d "//")
-
 pkgs = $(shell go list ./... | grep -v /vendor/ | grep -v /test/)

 all: check-license format build test
@ -31,7 +30,7 @@ container:

 e2e-test:
 	go test -timeout 20m -v ./test/migration/ $(TEST_RUN_ARGS) --kubeconfig "$(HOME)/.kube/config" --operator-image=$(REPO):$(TAG) --namespace=$(NAMESPACE) --cluster-ip=$(CLUSTER_IP)
-	go test -timeout 20m -v ./test/e2e/ $(TEST_RUN_ARGS) --kubeconfig "$(HOME)/.kube/config" --operator-image=$(REPO):$(TAG) --namespace=$(NAMESPACE) --cluster-ip=$(CLUSTER_IP)
+	go test -timeout 20m -v ./test/e2e/ $(TEST_RUN_ARGS) --kubeconfig=$(KUBECONFIG) --operator-image=$(REPO):$(TAG) --namespace=$(NAMESPACE)

 e2e-status:
 	kubectl get prometheus,alertmanager,servicemonitor,statefulsets,deploy,svc,endpoints,pods,cm,secrets,replicationcontrollers --all-namespaces
@ -54,8 +53,8 @@ apidocgen:
 	@go install github.com/coreos/prometheus-operator/cmd/apidocgen

 docs: embedmd apidocgen
-	embedmd -w `find Documentation -name "*.md"`
-	apidocgen pkg/client/monitoring/v1alpha1/types.go > Documentation/api.md
+	$(GOPATH)/bin/embedmd -w `find Documentation -name "*.md"`
+	$(GOPATH)/bin/apidocgen pkg/client/monitoring/v1alpha1/types.go > Documentation/api.md

 generate:
 	hack/generate.sh
--- a/2
+++ b/2
@ -1,2 +1,2 @@
-0.10.1
+0.11.0

--- a/bundle.yaml
+++ b/bundle.yaml
@ -75,7 +75,7 @@ spec:
      serviceAccountName: prometheus-operator
      containers:
      - name: prometheus-operator
-        image: quay.io/coreos/prometheus-operator:v0.10.1
+        image: quay.io/coreos/prometheus-operator:v0.11.0
        resources:
          requests:
            cpu: 100m
--- a/cmd/apidocgen/main.go
+++ b/cmd/apidocgen/main.go
@ -42,6 +42,7 @@ var (
 		"v1.ResourceRequirements": "https://kubernetes.io/docs/api-reference/v1.6/#resourcerequirements-v1-core",
 		"v1.LocalObjectReference": "https://kubernetes.io/docs/api-reference/v1.6/#localobjectreference-v1-core",
 		"v1.SecretKeySelector":    "https://kubernetes.io/docs/api-reference/v1.6/#secretkeyselector-v1-core",
+		"v1.PersistentVolumeClaim": "https://kubernetes.io/docs/api-reference/v1.6/#persistentvolumeclaim-v1-core",
 	}

 	selfLinks = map[string]string{}
--- a/cmd/operator/main.go
+++ b/cmd/operator/main.go
@ -52,7 +52,7 @@ func init() {
 	flagset.StringVar(&cfg.KubeletObject, "kubelet-service", "", "Service/Endpoints object to write kubelets into in format \"namespace/name\"")
 	flagset.BoolVar(&cfg.TLSInsecure, "tls-insecure", false, "- NOT RECOMMENDED FOR PRODUCTION - Don't verify API server's CA certificate.")
 	flagset.BoolVar(&analyticsEnabled, "analytics", true, "Send analytical event (Cluster Created/Deleted etc.) to Google Analytics")
-	flagset.StringVar(&cfg.PrometheusConfigReloader, "prometheus-config-reloader", "quay.io/coreos/prometheus-config-reloader:v0.0.1", "Config and rule reload image")
+	flagset.StringVar(&cfg.PrometheusConfigReloader, "prometheus-config-reloader", "quay.io/coreos/prometheus-config-reloader:v0.0.2", "Config and rule reload image")
 	flagset.StringVar(&cfg.ConfigReloaderImage, "config-reloader-image", "quay.io/coreos/configmap-reload:v0.0.1", "Reload Image")
 	flagset.StringVar(&cfg.AlertmanagerDefaultBaseImage, "alertmanager-default-base-image", "quay.io/prometheus/alertmanager", "Alertmanager default base image")
 	flagset.StringVar(&cfg.PrometheusDefaultBaseImage, "prometheus-default-base-image", "quay.io/prometheus/prometheus", "Prometheus default base image")
--- a/contrib/grafana-watcher/Makefile
+++ b/contrib/grafana-watcher/Makefile
@ -3,7 +3,7 @@ all: build
 FLAGS =
 ENVVAR = GOOS=linux GOARCH=amd64 CGO_ENABLED=0
 REGISTRY = quay.io/coreos
-TAG = v0.0.4
+TAG = v0.0.5
 NAME = grafana-watcher

 build:
--- a/contrib/grafana-watcher/examples/grafana-bundle.yaml
+++ b/contrib/grafana-watcher/examples/grafana-bundle.yaml
@ -1,8 +1,6 @@
 apiVersion: v1
 kind: Service
 metadata:
-  annotations:
-    prometheus.io/scrape: 'true'
  labels:
    app: grafana
    name: grafana
@ -75,8 +73,7 @@ spec:
            memory: 200Mi
            cpu: 200m
      - name: grafana-watcher
-        image: quay.io/coreos/grafana-watcher:v0.0.4
-        imagePullPolicy: Never
+        image: quay.io/coreos/grafana-watcher:v0.0.5
        args:
          - '--watch-dir=/var/grafana-dashboards'
          - '--grafana-url=http://localhost:3000'
@ -110,4 +107,3 @@ spec:
      - name: grafana-dashboards
        configMap:
          name: grafana-dashboards
-
--- a/contrib/grafana-watcher/updater/datasource.go
+++ b/contrib/grafana-watcher/updater/datasource.go
@ -82,7 +82,7 @@ func (u *GrafanaDatasourceUpdater) createDatasourcesFromFiles() error {
 	}

 	for _, fp := range filePaths {
-		u.createDatasourceFromFile(fp)
+		err = u.createDatasourceFromFile(fp)
 		if err != nil {
 			return err
 		}
--- a/contrib/kube-prometheus/assets/prometheus/rules/general.rules
+++ b/contrib/kube-prometheus/assets/prometheus/rules/general.rules
@ -1,14 +1,14 @@
 ### Up Alerting ###

 Alert TargetDown
-  IF 100 * (count(up == 0) / count(up)) > 3
+  IF 100 * (count by(job) (up == 0) / count by(job) (up)) > 10
  FOR 10m
  LABELS {
    severity = "warning"
  }
  ANNOTATIONS {
    summary = "Targets are down",
-    description = "More than {{ $value }}% of targets are down."
+    description = "{{ $value }}% or more of {{ $labels.job }} targets are down."
  }

 ### Dead man's switch ###
--- a/contrib/kube-prometheus/assets/prometheus/rules/kube-apiserver.rules
+++ b/contrib/kube-prometheus/assets/prometheus/rules/kube-apiserver.rules
@ -1,5 +1,5 @@
 ALERT K8SApiserverDown
-  IF absent({job="apiserver"}) or (count by(cluster) (up{job="apiserver"} == 1) < count by(cluster) (up{job="apiserver"}))
+  IF absent(up{job="apiserver"} == 1)
  FOR 5m
  LABELS {
    severity = "critical"
--- a/contrib/kube-prometheus/assets/prometheus/rules/kube-controller-manager.rules
+++ b/contrib/kube-prometheus/assets/prometheus/rules/kube-controller-manager.rules
@ -1,5 +1,5 @@
 ALERT K8SControllerManagerDown
-  IF absent(up{job="kube-controller-manager"}) or (count by(cluster) (up{job="kube-controller-manager"} == 1) == 0)
+  IF absent(up{job="kube-controller-manager"} == 1)
  FOR 5m
  LABELS {
    severity = "critical",
@ -7,4 +7,5 @@ ALERT K8SControllerManagerDown
  ANNOTATIONS {
    summary = "Controller manager is down",
    description = "There is no running K8S controller manager. Deployments and replication controllers are not making progress.",
+    runbook = "https://coreos.com/tectonic/docs/latest/troubleshooting/controller-recovery.html#recovering-a-controller-manager",
  }
--- a/contrib/kube-prometheus/assets/prometheus/rules/kube-scheduler.rules
+++ b/contrib/kube-prometheus/assets/prometheus/rules/kube-scheduler.rules
@ -1,5 +1,5 @@
 ALERT K8SSchedulerDown
-  IF absent(up{job="kube-scheduler"}) or (count by(cluster) (up{job="kube-scheduler"} == 1) == 0)
+  IF absent(up{job="kube-scheduler"} == 1)
  FOR 5m
  LABELS {
    severity = "critical",
@ -7,4 +7,5 @@ ALERT K8SSchedulerDown
  ANNOTATIONS {
    summary = "Scheduler is down",
    description = "There is no running K8S scheduler. New pods are not being assigned to nodes.",
+    runbook = "https://coreos.com/tectonic/docs/latest/troubleshooting/controller-recovery.html#recovering-a-scheduler",
  }
--- a/contrib/kube-prometheus/assets/prometheus/rules/kubelet.rules
+++ b/contrib/kube-prometheus/assets/prometheus/rules/kubelet.rules
@ -11,24 +11,24 @@ ALERT K8SNodeNotReady

 ALERT K8SManyNodesNotReady
  IF
-    count by (cluster) (kube_node_status_ready{condition="true"} == 0) > 1
+    count(kube_node_status_ready{condition="true"} == 0) > 1
    AND
      (
-        count by (cluster) (kube_node_status_ready{condition="true"} == 0)
+        count(kube_node_status_ready{condition="true"} == 0)
      /
-        count by (cluster) (kube_node_status_ready{condition="true"})
+        count(kube_node_status_ready{condition="true"})
      ) > 0.2
  FOR 1m
  LABELS {
    severity = "critical",
  }
  ANNOTATIONS {
-    summary = "Many K8s nodes are Not Ready",
-    description = "{{ $value }} K8s nodes (more than 10% of cluster {{ $labels.cluster }}) are in the NotReady state.",
+    summary = "Many Kubernetes nodes are Not Ready",
+    description = "{{ $value }} Kubernetes nodes (more than 10% are in the NotReady state).",
  }

 ALERT K8SKubeletDown
-  IF count by (cluster) (up{job="kubelet"} == 0) / count by (cluster) (up{job="kubelet"}) > 0.03
+  IF count(up{job="kubelet"} == 0) / count(up{job="kubelet"}) > 0.03
  FOR 1h
  LABELS {
    severity = "warning",
@ -39,7 +39,7 @@ ALERT K8SKubeletDown
  }

 ALERT K8SKubeletDown
-  IF absent(up{job="kubelet"}) or count by (cluster) (up{job="kubelet"} == 0) / count by (cluster) (up{job="kubelet"}) > 0.1
+  IF absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"}) > 0.1
  FOR 1h
  LABELS {
    severity = "critical",
--- a/contrib/kube-prometheus/assets/prometheus/rules/node.rules
+++ b/contrib/kube-prometheus/assets/prometheus/rules/node.rules
@ -1,10 +1,10 @@
 ALERT NodeExporterDown
-  IF up{job="node-exporter"} == 0
+  IF absent(up{job="node-exporter"} == 1)
  FOR 10m
  LABELS {
    severity = "warning"
  }
  ANNOTATIONS {
    summary = "node-exporter cannot be scraped",
-    description = "Prometheus could not scrape a node-exporter for more than 10m.",
+    description = "Prometheus could not scrape a node-exporter for more than 10m, or node-exporters have disappeared from discovery.",
  }
--- a/contrib/kube-prometheus/manifests/grafana/grafana-deployment.yaml
+++ b/contrib/kube-prometheus/manifests/grafana/grafana-deployment.yaml
@ -41,7 +41,7 @@ spec:
            memory: 200Mi
            cpu: 200m
      - name: grafana-watcher
-        image: quay.io/coreos/grafana-watcher:v0.0.4
+        image: quay.io/coreos/grafana-watcher:v0.0.5
        args:
          - '--watch-dir=/var/grafana-dashboards'
          - '--grafana-url=http://localhost:3000'
@ -56,9 +56,6 @@ spec:
            secretKeyRef:
              name: grafana-credentials
              key: password
-        volumeMounts:
-        - name: grafana-dashboards
-          mountPath: /var/grafana-dashboards
        resources:
          requests:
            memory: "16Mi"
--- a/contrib/kube-prometheus/manifests/kube-state-metrics/kube-state-metrics-cluster-role.yaml
+++ b/contrib/kube-prometheus/manifests/kube-state-metrics/kube-state-metrics-cluster-role.yaml
@ -7,7 +7,10 @@ rules:
  resources:
  - nodes
  - pods
+  - services
  - resourcequotas
+  - replicationcontrollers
+  - limitranges
  verbs: ["list", "watch"]
 - apiGroups: ["extensions"]
  resources:
--- a/contrib/kube-prometheus/manifests/prometheus-operator/prometheus-operator.yaml
+++ b/contrib/kube-prometheus/manifests/prometheus-operator/prometheus-operator.yaml
@ -14,7 +14,7 @@ spec:
      serviceAccountName: prometheus-operator
      containers:
      - name: prometheus-operator
-        image: quay.io/coreos/prometheus-operator:v0.10.1
+        image: quay.io/coreos/prometheus-operator:v0.11.0
        args:
        - "--kubelet-service=kube-system/kubelet"
        - "--config-reloader-image=quay.io/coreos/configmap-reload:v0.0.1"
--- a/contrib/kube-prometheus/manifests/prometheus/prometheus-k8s-rules.yaml
+++ b/contrib/kube-prometheus/manifests/prometheus/prometheus-k8s-rules.yaml
@ -225,14 +225,14 @@ data:
    ### Up Alerting ###
    
    Alert TargetDown
-      IF 100 * (count(up == 0) / count(up)) > 3
+      IF 100 * (count by(job) (up == 0) / count by(job) (up)) > 10
      FOR 10m
      LABELS {
        severity = "warning"
      }
      ANNOTATIONS {
        summary = "Targets are down",
-        description = "More than {{ $value }}% of targets are down."
+        description = "{{ $value }}% or more of {{ $labels.job }} targets are down."
      }
    
    ### Dead man's switch ###
@ -287,7 +287,7 @@ data:
      }
  kube-apiserver.rules: |+
    ALERT K8SApiserverDown
-      IF absent({job="apiserver"}) or (count by(cluster) (up{job="apiserver"} == 1) < count by(cluster) (up{job="apiserver"}))
+      IF absent(up{job="apiserver"} == 1)
      FOR 5m
      LABELS {
        severity = "critical"
@ -316,7 +316,7 @@ data:
      }
  kube-controller-manager.rules: |+
    ALERT K8SControllerManagerDown
-      IF absent(up{job="kube-controller-manager"}) or (count by(cluster) (up{job="kube-controller-manager"} == 1) == 0)
+      IF absent(up{job="kube-controller-manager"} == 1)
      FOR 5m
      LABELS {
        severity = "critical",
@ -324,6 +324,7 @@ data:
      ANNOTATIONS {
        summary = "Controller manager is down",
        description = "There is no running K8S controller manager. Deployments and replication controllers are not making progress.",
+        runbook = "https://coreos.com/tectonic/docs/latest/troubleshooting/controller-recovery.html#recovering-a-controller-manager",
      }
  kubelet.rules: |+
    ALERT K8SNodeNotReady
@ -339,24 +340,24 @@ data:
    
    ALERT K8SManyNodesNotReady
      IF
-        count by (cluster) (kube_node_status_ready{condition="true"} == 0) > 1
+        count(kube_node_status_ready{condition="true"} == 0) > 1
        AND
          (
-            count by (cluster) (kube_node_status_ready{condition="true"} == 0)
+            count(kube_node_status_ready{condition="true"} == 0)
          /
-            count by (cluster) (kube_node_status_ready{condition="true"})
+            count(kube_node_status_ready{condition="true"})
          ) > 0.2
      FOR 1m
      LABELS {
        severity = "critical",
      }
      ANNOTATIONS {
-        summary = "Many K8s nodes are Not Ready",
-        description = "{{ $value }} K8s nodes (more than 10% of cluster {{ $labels.cluster }}) are in the NotReady state.",
+        summary = "Many Kubernetes nodes are Not Ready",
+        description = "{{ $value }} Kubernetes nodes (more than 10% are in the NotReady state).",
      }
    
    ALERT K8SKubeletDown
-      IF count by (cluster) (up{job="kubelet"} == 0) / count by (cluster) (up{job="kubelet"}) > 0.03
+      IF count(up{job="kubelet"} == 0) / count(up{job="kubelet"}) > 0.03
      FOR 1h
      LABELS {
        severity = "warning",
@ -367,7 +368,7 @@ data:
      }
    
    ALERT K8SKubeletDown
-      IF absent(up{job="kubelet"}) or count by (cluster) (up{job="kubelet"} == 0) / count by (cluster) (up{job="kubelet"}) > 0.1
+      IF absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"}) > 0.1
      FOR 1h
      LABELS {
        severity = "critical",
@ -560,7 +561,7 @@ data:
      histogram_quantile(0.5,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6
  kube-scheduler.rules: |+
    ALERT K8SSchedulerDown
-      IF absent(up{job="kube-scheduler"}) or (count by(cluster) (up{job="kube-scheduler"} == 1) == 0)
+      IF absent(up{job="kube-scheduler"} == 1)
      FOR 5m
      LABELS {
        severity = "critical",
@ -568,17 +569,18 @@ data:
      ANNOTATIONS {
        summary = "Scheduler is down",
        description = "There is no running K8S scheduler. New pods are not being assigned to nodes.",
+        runbook = "https://coreos.com/tectonic/docs/latest/troubleshooting/controller-recovery.html#recovering-a-scheduler",
      }
  node.rules: |+
    ALERT NodeExporterDown
-      IF up{job="node-exporter"} == 0
+      IF absent(up{job="node-exporter"} == 1)
      FOR 10m
      LABELS {
        severity = "warning"
      }
      ANNOTATIONS {
        summary = "node-exporter cannot be scraped",
-        description = "Prometheus could not scrape a node-exporter for more than 10m.",
+        description = "Prometheus could not scrape a node-exporter for more than 10m, or node-exporters have disappeared from discovery.",
      }
  prometheus.rules: |+
    ALERT FailedReload
--- a/contrib/kube-prometheus/manifests/prometheus/prometheus-k8s-service-monitor-kubelet.yaml
+++ b/contrib/kube-prometheus/manifests/prometheus/prometheus-k8s-service-monitor-kubelet.yaml
@ -9,6 +9,8 @@ spec:
  endpoints:
  - port: http-metrics
    interval: 30s
+  - port: cadvisor
+    interval: 30s
    honorLabels: true
  selector:
    matchLabels:
--- a/contrib/prometheus-config-reloader/Makefile
+++ b/contrib/prometheus-config-reloader/Makefile
@ -4,7 +4,7 @@ FLAGS =
 ENVVAR = GOOS=linux GOARCH=amd64 CGO_ENABLED=0
 NAME = prometheus-config-reloader
 REPO = quay.io/coreos/$(NAME)
-TAG = v0.0.1
+TAG = v0.0.2
 IMAGE = $(REPO):$(TAG)

 build:
--- a/contrib/prometheus-config-reloader/examples/prometheus-config-reloader.yaml
+++ b/contrib/prometheus-config-reloader/examples/prometheus-config-reloader.yaml
@ -30,7 +30,7 @@ spec:
          mountPath: /etc/prometheus/rules
          readOnly: true
      - name: prometheus-config-reloader
-        image: quay.io/coreos/prometheus-config-reloader:v0.0.1
+        image: quay.io/coreos/prometheus-config-reloader:v0.0.2
        args:
        - '-config-volume-dir=/etc/prometheus/config'
        - '-rule-volume-dir=/etc/prometheus/rules'
--- a/contrib/prometheus-config-reloader/main.go
+++ b/contrib/prometheus-config-reloader/main.go
@ -24,9 +24,11 @@ import (
 	"os"
 	"path/filepath"
 	"strings"
+	"time"

 	fsnotify "gopkg.in/fsnotify.v1"

+	"github.com/cenkalti/backoff"
 	"github.com/ericchiang/k8s"
 	"github.com/go-kit/kit/log"
 )
@ -208,7 +210,9 @@ func (w *volumeWatcher) Refresh() {
 	}

 	w.logger.Log("msg", "Reloading Prometheus...")
-	err = w.ReloadPrometheus()
+	err = backoff.RetryNotify(w.ReloadPrometheus, backoff.NewExponentialBackOff(), func(err error, next time.Duration) {
+		w.logger.Log("msg", "Reloading Prometheus temporarily failed.", "err", err, "next-retry", next)
+	})
 	if err != nil {
 		w.logger.Log("msg", "Reloading Prometheus failed.", "err", err)
 	} else {
--- a/example/non-rbac/prometheus-operator.yaml
+++ b/example/non-rbac/prometheus-operator.yaml
@ -13,7 +13,7 @@ spec:
    spec:
      containers:
      - name: prometheus-operator
-        image: quay.io/coreos/prometheus-operator:v0.10.1
+        image: quay.io/coreos/prometheus-operator:v0.11.0
        resources:
          requests:
            cpu: 100m
--- a/example/rbac/prometheus-operator/prometheus-operator.yaml
+++ b/example/rbac/prometheus-operator/prometheus-operator.yaml
@ -14,7 +14,7 @@ spec:
      serviceAccountName: prometheus-operator
      containers:
      - name: prometheus-operator
-        image: quay.io/coreos/prometheus-operator:v0.10.1
+        image: quay.io/coreos/prometheus-operator:v0.11.0
        resources:
          requests:
            cpu: 100m
--- a/helm/alertmanager/Chart.yaml
+++ b/helm/alertmanager/Chart.yaml
@ -7,4 +7,4 @@ maintainers:
 name: alertmanager
 sources:
  - https://github.com/coreos/prometheus-operator
-version: 0.0.1
+version: 0.0.3
--- a/helm/alertmanager/README.md
+++ b/helm/alertmanager/README.md
@ -76,5 +76,6 @@ $ helm install opsgoodness/alertmanager --name my-release -f values.yaml
 > **Tip**: You can use the default [values.yaml](values.yaml)

 ### Third-party Resource Documentation
- [alertmanager](https://github.com/coreos/prometheus-operator/blob/master/Documentation/alertmanager.md)
- [prometheus](https://github.com/coreos/prometheus-operator/blob/master/Documentation/prometheus.md)
+- [Alertmanager](/Documentation/design.md#alertmanager)
+- [Prometheus](/Documentation/design.md#prometheus)
+- [ServiceMonitor](/Documentation/design.md#servicemonitor)
--- a/helm/alertmanager/templates/alertmanager.yaml
+++ b/helm/alertmanager/templates/alertmanager.yaml
@ -17,14 +17,14 @@ spec:
 {{- else }}
  externalUrl: http://{{ template "fullname" . }}.{{ .Release.Namespace }}:9093
 {{- end }}
-# {{- if .Values.nodeSelector }}
-#   nodeSelector:
-# {{ toYaml .Values.nodeSelector | indent 4 }}
-# {{- end }}
+{{- if .Values.nodeSelector }}
+  nodeSelector:
+{{ toYaml .Values.nodeSelector | indent 4 }}
+{{- end }}
  paused: {{ .Values.paused }}
  replicas: {{ .Values.replicaCount }}
-#   resources:
-# {{ toYaml .Values.resources | indent 4 }}
+  resources:
+{{ toYaml .Values.resources | indent 4 }}
 {{- if .Values.storageSpec }}
  storage:
 {{ toYaml .Values.storageSpec | indent 4 }}
--- a/helm/alertmanager/values.yaml
+++ b/helm/alertmanager/values.yaml
@ -25,7 +25,7 @@ externalUrl: ""
 ##
 image:
  repository: quay.io/prometheus/alertmanager
-  tag: v0.5.1
+  tag: v0.7.1

 ingress:
  ## If true, Alertmanager Ingress will be created
@ -100,8 +100,12 @@ service:
  ##
  type: ClusterIP

+## If true, create & use RBAC resources
+##
+rbacEnable: true
+
 ## Alertmanager StorageSpec for persistent data
-## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/prometheus.md#storagespec
+## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/user-guides/storage.md
 ##
 storageSpec: {}
 #   class: default
--- a/helm/kube-prometheus/values.yaml
+++ b/helm/kube-prometheus/values.yaml
@ -102,7 +102,7 @@ alertmanager:
    type: ClusterIP

  ## Alertmanager StorageSpec for persistent data
-  ## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/prometheus.md#storagespec
+  ## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/user-guides/storage.md
  ##
  storageSpec: {}
  #   class: default
@ -113,7 +113,7 @@ alertmanager:

 prometheus:
  ## Alertmanagers to which alerts will be sent
-  ## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/prometheus.md#alertmanagerendpoints
+  ## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#alertmanagerendpoints
  ##
  alertingEndpoints: []
  #   - name: ""
@ -319,7 +319,7 @@ prometheus:
            # serverName: ""

  ## Prometheus StorageSpec for persistent data
-  ## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/prometheus.md#storagespec
+  ## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/user-guides/storage.md
  ##
  storageSpec: {}
  #   class: default
--- a/helm/prometheus-operator/Chart.yaml
+++ b/helm/prometheus-operator/Chart.yaml
@ -7,4 +7,4 @@ maintainers:
 name: prometheus-operator
 sources:
  - https://github.com/coreos/prometheus-operator
-version: 0.0.5
+version: 0.0.6
--- a/helm/prometheus-operator/README.md
+++ b/helm/prometheus-operator/README.md
@ -90,7 +90,7 @@ Parameter | Description | Default
 `kubeletService.name` | The name of the kubelet service to be created | `kubelet`
 `nodeSelector` | Node labels for pod assignment | `{}`
 `prometheusConfigReloader.repository` | prometheus-config-reloader image | `quay.io/coreos/prometheus-config-reloader`
-`prometheusConfigReloader.tag` | prometheus-config-reloader tag | `v0.0.1`
+`prometheusConfigReloader.tag` | prometheus-config-reloader tag | `v0.0.2`
 `rbacEnable` | If true, create & use RBAC resources | `true`
 `resources` | Pod resource requests & limits | `{}`
 `sendAnalytics` | Collect & send anonymous usage statistics | `true`
--- a/helm/prometheus-operator/values.yaml
+++ b/helm/prometheus-operator/values.yaml
@ -10,7 +10,7 @@ global:
 ##
 prometheusConfigReloader:
  repository: quay.io/coreos/prometheus-config-reloader
-  tag: v0.0.1
+  tag: v0.0.2

 ## Configmap-reload image to use for reloading configmaps
 ##
@ -22,7 +22,7 @@ configmapReload:
 ##
 image:
  repository: quay.io/coreos/prometheus-operator
-  tag: v0.9.1
+  tag: v0.10.1
  pullPolicy: IfNotPresent

 ## If enabled, prometheus-operator will create a service for scraping kubelets
--- a/helm/prometheus/Chart.yaml
+++ b/helm/prometheus/Chart.yaml
@ -7,4 +7,4 @@ maintainers:
 name: prometheus
 sources:
  - https://github.com/coreos/prometheus-operator
-version: 0.0.1
+version: 0.0.3
--- a/helm/prometheus/README.md
+++ b/helm/prometheus/README.md
@ -44,6 +44,7 @@ Parameter | Description | Default
 --- | --- | ---
 `alertingEndpoints` | Alertmanagers to which alerts will be sent | `[]`
 `config` | Prometheus configuration directives | `{}`
+`externalLabels` | The labels to add to any time series or alerts when communicating with external systems  | `{}`
 `externalUrl` | External URL at which Prometheus will be reachable | `""`
 `image.repository` | Image | `quay.io/prometheus/prometheus`
 `image.tag` | Image tag | `v1.5.2`
@ -81,5 +82,6 @@ $ helm install opsgoodness/prometheus --name my-release -f values.yaml
 > **Tip**: You can use the default [values.yaml](values.yaml)

 ### Third-party Resource Documentation
- [prometheus](https://github.com/coreos/prometheus-operator/blob/master/Documentation/prometheus.md)
- [servicemonitor](https://github.com/coreos/prometheus-operator/blob/master/Documentation/service-monitor.md)
+- [Alertmanager](/Documentation/design.md#alertmanager)
+- [Prometheus](/Documentation/design.md#prometheus)
+- [ServiceMonitor](/Documentation/design.md#servicemonitor)
--- a/helm/prometheus/templates/clusterrole.yaml
+++ b/helm/prometheus/templates/clusterrole.yaml
@ -0,0 +1,29 @@
+{{- if .Values.rbacEnable }}
+{{- if .Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1beta1" }}
+apiVersion: rbac.authorization.k8s.io/v1beta1
+{{- else if .Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1alpha1" }}
+apiVersion: rbac.authorization.k8s.io/v1alpha1
+{{- end }}
+kind: ClusterRole
+metadata:
+  labels:
+    app: {{ template "name" . }}
+    chart: {{ .Chart.Name }}-{{ .Chart.Version }}
+    heritage: {{ .Release.Service }}
+    release: {{ .Release.Name }}
+  name: {{ template "fullname" . }}
+rules:
+- apiGroups: [""]
+  resources:
+  - nodes
+  - services
+  - endpoints
+  - pods
+  verbs: ["get", "list", "watch"]
+- apiGroups: [""]
+  resources:
+  - configmaps
+  verbs: ["get"]
+- nonResourceURLs: ["/metrics"]
+  verbs: ["get"]
+{{- end }}
--- a/helm/prometheus/templates/clusterrolebinding.yaml
+++ b/helm/prometheus/templates/clusterrolebinding.yaml
@ -0,0 +1,24 @@
+{{- if .Values.rbacEnable }}
+{{- if .Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1beta1" }}
+apiVersion: rbac.authorization.k8s.io/v1beta1
+{{- else if .Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1alpha1" }}
+apiVersion: rbac.authorization.k8s.io/v1alpha1
+{{- end }}
+kind: ClusterRoleBinding
+metadata:
+  labels:
+    app: {{ template "name" . }}
+    chart: {{ .Chart.Name }}-{{ .Chart.Version }}
+    heritage: {{ .Release.Service }}
+    release: {{ .Release.Name }}
+  name: {{ template "fullname" . }}
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: {{ template "fullname" . }}
+subjects:
+  - kind: ServiceAccount
+    name: {{ template "fullname" . }}
+    namespace: {{ .Release.Namespace }}
+{{- end }}
+
--- a/helm/prometheus/templates/prometheus.yaml
+++ b/helm/prometheus/templates/prometheus.yaml
@ -21,6 +21,10 @@ spec:
        port: http
 {{- end }}
  baseImage: "{{ .Values.image.repository }}"
+{{- if .Values.externalLabels }}
+  externalLabels:
+{{ toYaml .Values.externalLabels | indent 4}}
+{{- end }}
 {{- if .Values.externalUrl }}
  externalUrl: "{{ .Values.externalUrl }}"
 {{- else if .Values.ingress.fqdn }}
@ -40,6 +44,9 @@ spec:
 {{- if .Values.routePrefix }}
  routePrefix: "{{ .Values.routePrefix }}"
 {{- end }}
+{{- if .Values.rbacEnable }}
+  serviceAccountName: {{ template "fullname" . }}
+{{- end }}
 {{- if .Values.serviceMonitorsSelector }}
  serviceMonitorSelector:
    matchLabels:
--- a/helm/prometheus/templates/serviceaccount.yaml
+++ b/helm/prometheus/templates/serviceaccount.yaml
@ -0,0 +1,11 @@
+{{- if .Values.rbacEnable }}
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  labels:
+    app: {{ template "name" . }}
+    chart: {{ .Chart.Name }}-{{ .Chart.Version }}
+    heritage: {{ .Release.Service }}
+    release: {{ .Release.Name }}
+  name: {{ template "fullname" . }}
+{{- end }}
--- a/helm/prometheus/values.yaml
+++ b/helm/prometheus/values.yaml
@ -1,5 +1,5 @@
 ## Alertmanagers to which alerts will be sent
-## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/prometheus.md#alertmanagerendpoints
+## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#alertmanagerendpoints
 ##
 alertingEndpoints: []
 #   - name: ""
@ -15,6 +15,10 @@ config:
  specifiedInValues: true
  value: {}

+## External labels to add to any time series or alerts when communicating with external systems
+##
+externalLabels: {}
+
 ## External URL at which Prometheus will be reachable
 ##
 externalUrl: ""
@ -23,7 +27,7 @@ externalUrl: ""
 ##
 image:
  repository: quay.io/prometheus/prometheus
-  tag: v1.5.2
+  tag: v1.7.1

 ingress:
  ## If true, Prometheus Ingress will be created
@ -55,6 +59,10 @@ nodeSelector: {}
 ##
 paused: false

+## If true, create & use RBAC resources
+##
+rbacEnable: true
+
 ## Number of Prometheus replicas desired
 ##
 replicaCount: 1
@ -205,7 +213,7 @@ serviceMonitors: []
          # serverName: ""

 ## Prometheus StorageSpec for persistent data
-## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/prometheus.md#storagespec
+## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/user-guides/storage.md
 ##
 storageSpec: {}
 #   class: default
--- a/pkg/client/monitoring/v1alpha1/types.go
+++ b/pkg/client/monitoring/v1alpha1/types.go
@ -125,7 +125,7 @@ type PrometheusStatus struct {
 	UnavailableReplicas int32 `json:"unavailableReplicas"`
 }

-// AlertingSpec defines paramters for alerting configuration of Prometheus servers.
+// AlertingSpec defines parameters for alerting configuration of Prometheus servers.
 type AlertingSpec struct {
 	// AlertmanagerEndpoints Prometheus should fire alerts against.
 	Alertmanagers []AlertmanagerEndpoints `json:"alertmanagers"`
@ -144,7 +144,7 @@ type StorageSpec struct {
 	// info: http://kubernetes.io/docs/user-guide/persistent-volumes#resources
 	// DEPRECATED
 	Resources v1.ResourceRequirements `json:"resources"`
-	// Pvc A pvc spec to be used by the Prometheus statefulsets.
+	// A PVC spec to be used by the Prometheus StatefulSets.
 	VolumeClaimTemplate v1.PersistentVolumeClaim `json:"volumeClaimTemplate,omitempty"`
 }

--- a/pkg/k8sutil/k8sutil.go
+++ b/pkg/k8sutil/k8sutil.go
@ -35,7 +35,7 @@ import (
 // for use.
 // TODO(gouthamve): Move to clientset.Get()
 func WaitForCRDReady(restClient rest.Interface, crdGroup, crdVersion, crdName string) error {
-	return wait.Poll(3*time.Second, 30*time.Second, func() (bool, error) {
+	err := wait.Poll(3*time.Second, 5*time.Minute, func() (bool, error) {
 		res := restClient.Get().AbsPath("apis", crdGroup, crdVersion, crdName).Do()
 		err := res.Error()
 		if err != nil {
@ -57,6 +57,8 @@ func WaitForCRDReady(restClient rest.Interface, crdGroup, crdVersion, crdName st

 		return true, nil
 	})
+
+	return errors.Wrap(err, fmt.Sprintf("timed out waiting for TPR %s", crdName))
 }

 // PodRunningAndReady returns whether a pod is running and each container has
--- a/pkg/prometheus/operator.go
+++ b/pkg/prometheus/operator.go
@ -357,16 +357,20 @@ func (c *Operator) syncNodeEndpoints() {
 			},
 		},
 		Subsets: []v1.EndpointSubset{
-			v1.EndpointSubset{
+			{
 				Ports: []v1.EndpointPort{
-					v1.EndpointPort{
+					{
 						Name: "https-metrics",
 						Port: 10250,
 					},
-					v1.EndpointPort{
+					{
 						Name: "http-metrics",
 						Port: 10255,
 					},
+					{
+						Name: "cadvisor",
+						Port: 4194,
+					},
 				},
 			},
 		},
@ -402,7 +406,7 @@ func (c *Operator) syncNodeEndpoints() {
 			Type:      v1.ServiceTypeClusterIP,
 			ClusterIP: "None",
 			Ports: []v1.ServicePort{
-				v1.ServicePort{
+				{
 					Name: "https-metrics",
 					Port: 10250,
 				},
--- a/pkg/prometheus/promcfg.go
+++ b/pkg/prometheus/promcfg.go
@ -53,7 +53,7 @@ func stringMapToMapSlice(m map[string]string) yaml.MapSlice {
 func generateConfig(p *v1alpha1.Prometheus, mons map[string]*v1alpha1.ServiceMonitor, ruleConfigMaps int, basicAuthSecrets map[string]BasicAuthCredentials) ([]byte, error) {
 	versionStr := p.Spec.Version
 	if versionStr == "" {
-		versionStr = defaultVersion
+		versionStr = DefaultVersion
 	}

 	version, err := semver.Parse(strings.TrimLeft(versionStr, "v"))
@ -279,13 +279,13 @@ func generateServiceMonitorConfig(version semver.Version, m *v1alpha1.ServiceMon
 	} else if ep.TargetPort.StrVal != "" {
 		relabelings = append(relabelings, yaml.MapSlice{
 			{Key: "action", Value: "keep"},
-			{Key: "source_labels", Value: []string{"__meta_kubernetes_container_port_name"}},
+			{Key: "source_labels", Value: []string{"__meta_kubernetes_pod_container_port_name"}},
 			{Key: "regex", Value: ep.TargetPort.String()},
 		})
 	} else if ep.TargetPort.IntVal != 0 {
 		relabelings = append(relabelings, yaml.MapSlice{
 			{Key: "action", Value: "keep"},
-			{Key: "source_labels", Value: []string{"__meta_kubernetes_container_port_number"}},
+			{Key: "source_labels", Value: []string{"__meta_kubernetes_pod_container_port_number"}},
 			{Key: "regex", Value: ep.TargetPort.String()},
 		})
 	}
--- a/pkg/prometheus/statefulset.go
+++ b/pkg/prometheus/statefulset.go
@ -37,7 +37,7 @@ import (

 const (
 	governingServiceName = "prometheus-operated"
-	defaultVersion       = "v1.7.0"
+	DefaultVersion       = "v1.7.1"
 	defaultRetention     = "24h"

 	configMapsFilename = "configmaps.json"
@ -78,7 +78,7 @@ func makeStatefulSet(p v1alpha1.Prometheus, old *v1beta1.StatefulSet, config *Co
 		p.Spec.BaseImage = config.PrometheusDefaultBaseImage
 	}
 	if p.Spec.Version == "" {
-		p.Spec.Version = defaultVersion
+		p.Spec.Version = DefaultVersion
 	}
 	if p.Spec.Replicas != nil && *p.Spec.Replicas < minReplicas {
 		p.Spec.Replicas = &minReplicas
@ -123,6 +123,8 @@ func makeStatefulSet(p v1alpha1.Prometheus, old *v1beta1.StatefulSet, config *Co
 		pvcTemplate := storageSpec.VolumeClaimTemplate
 		pvcTemplate.Name = volumeName(p.Name)
 		pvcTemplate.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce}
+		pvcTemplate.Spec.Resources = storageSpec.VolumeClaimTemplate.Spec.Resources
+		pvcTemplate.Spec.Selector = storageSpec.VolumeClaimTemplate.Spec.Selector
 		statefulset.Spec.VolumeClaimTemplates = append(statefulset.Spec.VolumeClaimTemplates, pvcTemplate)
 	}

--- a/scripts/jenkins/Dockerfile
+++ b/scripts/jenkins/Dockerfile
@ -1,38 +0,0 @@
-FROM golang:1.8.1-stretch
-
-ENV TERRAFORM_VERSION 0.8.7
-ENV KOPS_VERSION 1.5.1
-ENV DOCKER_VERSION 1.13.1
-
-RUN echo "deb http://ftp.debian.org/debian wheezy-backports main" >> /etc/apt/sources.list
-
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    ca-certificates \
-    wget \
-    unzip \
-    python python-pip jq python-setuptools \
-    && rm -rf /var/lib/apt/lists/*
-
-RUN curl https://get.docker.com/builds/Linux/x86_64/docker-${DOCKER_VERSION}.tgz | tar -xvz && \
-    mv docker/docker /usr/local/bin/docker && \
-    chmod +x /usr/local/bin/docker && \
-    rm -r docker
-
-RUN wget -q -O /terraform.zip "https://releases.hashicorp.com/terraform/${TERRAFORM_VERSION}/terraform_${TERRAFORM_VERSION}_linux_amd64.zip" && \
-    unzip /terraform.zip -d /bin
-
-RUN wget -q -O /kops "https://github.com/kubernetes/kops/releases/download/${KOPS_VERSION}/kops-linux-amd64" && \
-    chmod +x /kops && \
-    mv /kops /bin
-
-RUN curl "https://s3.amazonaws.com/aws-cli/awscli-bundle.zip" -o "awscli-bundle.zip" && \
-    unzip awscli-bundle.zip && \
-    ./awscli-bundle/install -i /usr/local/aws -b /bin/aws && \
-    rm -r awscli-bundle awscli-bundle.zip
-
-RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl && \
-    chmod +x ./kubectl && \
-    mv ./kubectl /bin/kubectl
-
-RUN pip install yq
-
--- a/scripts/jenkins/Makefile
+++ b/scripts/jenkins/Makefile
@ -1,55 +0,0 @@
-CLUSTER_NAME  ?= prom-test-$(shell whoami)
-DOMAIN        ?= dev.coreos.systems
-AMOUNT_NODES	= $$(($(shell cat manifests/kops/regular-ig.yaml | yq '.spec.minSize')+1))
-
-path          ?= clusters/${CLUSTER_NAME}
-build_path    := $(path)/.build
-aws_region     = eu-west-1
-
-KOPS_CMD        = kops --state $(shell terraform output -state "$(build_path)/terraform.tfstate" kops_state_bucket)
-TERRAFORM_FLAGS = -var "dns_domain=$(DOMAIN)" -var "cluster_name=$(CLUSTER_NAME)" -state "$(build_path)/terraform.tfstate" 
-
-all: check-deps gen-ssh cluster wait-for-cluster run-e2e-tests
-
-check-deps:
-	@which aws || echo "AWS cli is missing."
-	@which kops || echo "Kops is missing."
-	@which kubectl || echo "Kubectl is missing."
-	@which terraform || echo "Terraform is missing."
-	@which jq || echo "jq is missing."
-	@which yq || echo "yq is missing."
-
-clean: clean-cluster clean-aws-deps
-
-gen-ssh:
-	ssh-keygen -t rsa -N "" -f /root/.ssh/id_rsa -q
-
-aws-deps:
-	AWS_REGION=$(aws_region) terraform apply $(TERRAFORM_FLAGS) ./templates
-
-cluster: aws-deps
-	$(KOPS_CMD) get cluster | grep -v $(CLUSTER_NAME).$(DOMAIN) || \
-	$(KOPS_CMD) create cluster \
-		--name $(CLUSTER_NAME).$(DOMAIN) \
-		--cloud aws --zones $(aws_region)a --kubernetes-version 1.5.2 \
-		--master-size t2.medium --yes \
-		--master-security-groups $(shell terraform output -state "$(build_path)/terraform.tfstate" kops_master_security_group) \
-		--node-security-groups $(shell terraform output -state "$(build_path)/terraform.tfstate" kops_master_security_group) \
-		--vpc $(shell terraform output -state "$(build_path)/terraform.tfstate" kops_main_vpc)
-	EDITOR='./ed.sh manifests/kops/regular-ig.yaml' $(KOPS_CMD) edit ig nodes
-	$(KOPS_CMD) update cluster --yes
-
-run-e2e-tests:
-	$(MAKE) -C ../../ e2e-test
-
-wait-for-cluster:
-	timeout 1800 ./wait-for-cluster.sh $(AMOUNT_NODES)
-
-clean-cluster:
-	$(KOPS_CMD) delete cluster --name $(CLUSTER_NAME).$(DOMAIN) --yes
-
-clean-aws-deps:
-	AWS_REGION=$(aws_region) terraform destroy -force $(TERRAFORM_FLAGS) ./templates
-	rm -f $(build_path)/terraform.tfstate*
-
-.PHONY: all check-deps clean gen-ssh aws-deps cluster run-e2e-tests wait-for-cluster clean-cluster clean-aws-deps
--- a/scripts/jenkins/docker-golang-env/Dockerfile
+++ b/scripts/jenkins/docker-golang-env/Dockerfile
@ -0,0 +1,8 @@
+FROM golang:1.8-stretch
+
+ENV DOCKER_VERSION 1.13.1
+
+RUN curl https://get.docker.com/builds/Linux/x86_64/docker-${DOCKER_VERSION}.tgz | tar -xvz && \
+    mv docker/docker /usr/local/bin/docker && \
+    chmod +x /usr/local/bin/docker && \
+    rm -r docker
--- a/scripts/jenkins/ed.sh
+++ b/scripts/jenkins/ed.sh
@ -1,14 +0,0 @@
-#!/bin/bash
-
-# Kops requires user input through an editor to update a ressource. Instead of
-# interacting with an editor we give Kops a fake editor via the 'EDITOR' env
-# var. This editor always writes the content of file '$1' into file '$2'. In the
-# Makefile before calling 'kops edit ig nodes' we set the 'EDITOR' env var to
-# this script with the wanted file as the first argument. The second argument
-# which is the file that is supposed to be edited by the user is passed in by
-# kops later.
-
-WANTED_FILE=$1
-TO_EDIT_FILE=$2
-
-cat $WANTED_FILE > $TO_EDIT_FILE
--- a/scripts/jenkins/kubectl-env/Dockerfile
+++ b/scripts/jenkins/kubectl-env/Dockerfile
@ -0,0 +1,13 @@
+FROM golang:1.8-stretch
+
+RUN echo "deb http://ftp.debian.org/debian wheezy-backports main" >> /etc/apt/sources.list
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ca-certificates \
+    unzip \
+    python python-pip jq \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl && \
+    chmod +x ./kubectl && \
+    mv ./kubectl /bin/kubectl
--- a/scripts/jenkins/kubernetes-vanilla.tfvars
+++ b/scripts/jenkins/kubernetes-vanilla.tfvars
@ -0,0 +1,230 @@
+// The e-mail address used to login as the admin user to the Tectonic Console.
+// 
+// Note: This field MUST be set manually prior to creating the cluster.
+tectonic_admin_email = "monitoring@coreos.com"
+
+// The bcrypt hash of admin user password to login to the Tectonic Console.
+// Use the bcrypt-hash tool (https://github.com/coreos/bcrypt-tool/releases/tag/v1.0.0) to generate it.
+// 
+// Note: This field MUST be set manually prior to creating the cluster.
+tectonic_admin_password_hash = ""
+
+// (optional) Extra AWS tags to be applied to created autoscaling group resources.
+// This is a list of maps having the keys `key`, `value` and `propagate_at_launch`.
+// 
+// Example: `[ { key = "foo", value = "bar", propagate_at_launch = true } ]`
+// tectonic_autoscaling_group_extra_tags = ""
+
+// Number of Availability Zones your EC2 instances will be deployed across.
+// This should be less than or equal to the total number available in the region. 
+// Be aware that some regions only have 2.
+// If set worker and master subnet CIDRs are calculated automatically.
+// 
+// Note:
+// This field MUST be set manually prior to creating the cluster.
+// It MUST NOT be set if availability zones CIDRs are configured using `tectonic_aws_master_custom_subnets` and `tectonic_aws_worker_custom_subnets`.
+tectonic_aws_az_count = "2"
+
+// Instance size for the etcd node(s). Example: `t2.medium`.
+tectonic_aws_etcd_ec2_type = "t2.medium"
+
+// The amount of provisioned IOPS for the root block device of etcd nodes.
+tectonic_aws_etcd_root_volume_iops = "100"
+
+// The size of the volume in gigabytes for the root block device of etcd nodes.
+tectonic_aws_etcd_root_volume_size = "30"
+
+// The type of volume for the root block device of etcd nodes.
+tectonic_aws_etcd_root_volume_type = "gp2"
+
+// (optional) List of subnet IDs within an existing VPC to deploy master nodes into.
+// Required to use an existing VPC and the list must match the AZ count.
+// 
+// Example: `["subnet-111111", "subnet-222222", "subnet-333333"]`
+// tectonic_aws_external_master_subnet_ids = ""
+
+// (optional) ID of an existing VPC to launch nodes into.
+// If unset a new VPC is created.
+// 
+// Example: `vpc-123456`
+// tectonic_aws_external_vpc_id = ""
+
+// If set to true, create public facing ingress resources (ELB, A-records).
+// If set to false, a "private" cluster will be created with an internal ELB only.
+tectonic_aws_external_vpc_public = true
+
+// (optional) List of subnet IDs within an existing VPC to deploy worker nodes into.
+// Required to use an existing VPC and the list must match the AZ count.
+// 
+// Example: `["subnet-111111", "subnet-222222", "subnet-333333"]`
+// tectonic_aws_external_worker_subnet_ids = ""
+
+// (optional) Extra AWS tags to be applied to created resources.
+// tectonic_aws_extra_tags = ""
+
+// (optional) This configures master availability zones and their corresponding subnet CIDRs directly.
+// 
+// Example:
+// `{ eu-west-1a = "10.0.0.0/20", eu-west-1b = "10.0.16.0/20" }`
+// 
+// Note that `tectonic_aws_az_count` must be unset if this is specified.
+// tectonic_aws_master_custom_subnets = ""
+
+// Instance size for the master node(s). Example: `t2.medium`.
+tectonic_aws_master_ec2_type = "t2.medium"
+
+// The amount of provisioned IOPS for the root block device of master nodes.
+tectonic_aws_master_root_volume_iops = "100"
+
+// The size of the volume in gigabytes for the root block device of master nodes.
+tectonic_aws_master_root_volume_size = "30"
+
+// The type of volume for the root block device of master nodes.
+tectonic_aws_master_root_volume_type = "gp2"
+
+// The target AWS region for the cluster.
+tectonic_aws_region = "eu-west-2"
+
+// Name of an SSH key located within the AWS region. Example: coreos-user.
+tectonic_aws_ssh_key = "jenkins-tpo-ssh-key"
+
+// Block of IP addresses used by the VPC.
+// This should not overlap with any other networks, such as a private datacenter connected via Direct Connect.
+tectonic_aws_vpc_cidr_block = "10.0.0.0/16"
+
+// (optional) This configures worker availability zones and their corresponding subnet CIDRs directly.
+// 
+// Example: `{ eu-west-1a = "10.0.64.0/20", eu-west-1b = "10.0.80.0/20" }`
+// 
+// Note that `tectonic_aws_az_count` must be unset if this is specified.
+// tectonic_aws_worker_custom_subnets = ""
+
+// Instance size for the worker node(s). Example: `t2.medium`.
+tectonic_aws_worker_ec2_type = "t2.medium"
+
+// The amount of provisioned IOPS for the root block device of worker nodes.
+tectonic_aws_worker_root_volume_iops = "100"
+
+// The size of the volume in gigabytes for the root block device of worker nodes.
+tectonic_aws_worker_root_volume_size = "30"
+
+// The type of volume for the root block device of worker nodes.
+tectonic_aws_worker_root_volume_type = "gp2"
+
+// The base DNS domain of the cluster.
+// 
+// Example: `openstack.dev.coreos.systems`.
+// 
+// Note: This field MUST be set manually prior to creating the cluster.
+// This applies only to cloud platforms.
+tectonic_base_domain = "dev.coreos.systems"
+
+// (optional) The content of the PEM-encoded CA certificate, used to generate Tectonic Console's server certificate.
+// If left blank, a CA certificate will be automatically generated.
+// tectonic_ca_cert = ""
+
+// (optional) The content of the PEM-encoded CA key, used to generate Tectonic Console's server certificate.
+// This field is mandatory if `tectonic_ca_cert` is set.
+// tectonic_ca_key = ""
+
+// (optional) The algorithm used to generate tectonic_ca_key.
+// The default value is currently recommend.
+// This field is mandatory if `tectonic_ca_cert` is set.
+// tectonic_ca_key_alg = "RSA"
+
+// The Container Linux update channel.
+// 
+// Examples: `stable`, `beta`, `alpha`
+tectonic_cl_channel = "stable"
+
+// This declares the IP range to assign Kubernetes pod IPs in CIDR notation.
+tectonic_cluster_cidr = "10.2.0.0/16"
+
+// The name of the cluster.
+// If used in a cloud-environment, this will be prepended to `tectonic_base_domain` resulting in the URL to the Tectonic console.
+// 
+// Note: This field MUST be set manually prior to creating the cluster.
+// Set via env variable
+//tectonic_cluster_name = ""
+
+// (optional) DNS prefix used to construct the console and API server endpoints.
+// tectonic_dns_name = ""
+
+// (optional) The path of the file containing the CA certificate for TLS communication with etcd.
+// 
+// Note: This works only when used in conjunction with an external etcd cluster.
+// If set, the variables `tectonic_etcd_servers`, `tectonic_etcd_client_cert_path`, and `tectonic_etcd_client_key_path` must also be set.
+// tectonic_etcd_ca_cert_path = ""
+
+// (optional) The path of the file containing the client certificate for TLS communication with etcd.
+// 
+// Note: This works only when used in conjunction with an external etcd cluster.
+// If set, the variables `tectonic_etcd_servers`, `tectonic_etcd_ca_cert_path`, and `tectonic_etcd_client_key_path` must also be set.
+// tectonic_etcd_client_cert_path = ""
+
+// (optional) The path of the file containing the client key for TLS communication with etcd.
+// 
+// Note: This works only when used in conjunction with an external etcd cluster.
+// If set, the variables `tectonic_etcd_servers`, `tectonic_etcd_ca_cert_path`, and `tectonic_etcd_client_cert_path` must also be set.
+// tectonic_etcd_client_key_path = ""
+
+// The number of etcd nodes to be created.
+// If set to zero, the count of etcd nodes will be determined automatically.
+// 
+// Note: This is currently only supported on AWS.
+tectonic_etcd_count = "0"
+
+// (optional) List of external etcd v3 servers to connect with (hostnames/IPs only).
+// Needs to be set if using an external etcd cluster.
+// 
+// Example: `["etcd1", "etcd2", "etcd3"]`
+// tectonic_etcd_servers = ""
+
+// If set to true, experimental Tectonic assets are being deployed.
+tectonic_experimental = false
+
+// The Kubernetes service IP used to reach kube-apiserver inside the cluster
+// as returned by `kubectl -n default get service kubernetes`.
+tectonic_kube_apiserver_service_ip = "10.3.0.1"
+
+// The Kubernetes service IP used to reach kube-dns inside the cluster
+// as returned by `kubectl -n kube-system get service kube-dns`.
+tectonic_kube_dns_service_ip = "10.3.0.10"
+
+// The Kubernetes service IP used to reach self-hosted etcd inside the cluster
+// as returned by `kubectl -n kube-system get service etcd-service`.
+tectonic_kube_etcd_service_ip = "10.3.0.15"
+
+// The path to the tectonic licence file.
+// 
+// Note: This field MUST be set manually prior to creating the cluster.
+tectonic_license_path = "/go/src/github.com/coreos/tectonic-installer/license"
+
+// The number of master nodes to be created.
+// This applies only to cloud platforms.
+tectonic_master_count = "1"
+
+// The path the pull secret file in JSON format.
+// 
+// Note: This field MUST be set manually prior to creating the cluster.
+tectonic_pull_secret_path = "/go/src/github.com/coreos/tectonic-installer/secret"
+
+// This declares the IP range to assign Kubernetes service cluster IPs in CIDR notation.
+tectonic_service_cidr = "10.3.0.0/16"
+
+// If set to true, a vanilla Kubernetes cluster will be deployed, omitting any Tectonic assets.
+tectonic_vanilla_k8s = true
+
+// The number of worker nodes to be created.
+// This applies only to cloud platforms.
+tectonic_worker_count = "3"
+
+tectonic_autoscaling_group_extra_tags = [
+  { key = "createdBy", value = "team-monitoring@coreos.com", propagate_at_launch = true },
+  { key = "expirationDate", value = "2017-01-01", propagate_at_launch = true }
+]
+
+tectonic_aws_extra_tags = {
+  "createdBy"="team-monitoring@coreos.com",
+  "expirationDate"="2017-01-01"
+}
--- a/scripts/jenkins/manifests/kops/regular-ig.yaml
+++ b/scripts/jenkins/manifests/kops/regular-ig.yaml
@ -1,14 +0,0 @@
-apiVersion: kops/v1alpha2
-kind: InstanceGroup
-metadata:
-  name: nodes
-spec:
-  associatePublicIp: true
-  machineType: t2.medium
-  maxSize: 2
-  minSize: 2
-  nodeLabels:
-    isolation: none
-  role: Node
-  zones:
-  - eu-west-1a
--- a/scripts/jenkins/post-e2e-tests.sh
+++ b/scripts/jenkins/post-e2e-tests.sh
@ -8,14 +8,26 @@ set -u
 # print each command before executing it
 set -x

-PO_GOPATH=/go/src/github.com/coreos/prometheus-operator
+export {TF_GET_OPTIONS,TF_PLAN_OPTIONS,TF_APPLY_OPTIONS,TF_DESTROY_OPTIONS}="-no-color"
+
+CLUSTER="po-$(git rev-parse --short HEAD)-${BUILD_ID}"
+TF_VAR_tectonic_cluster_name="${CLUSTER}"
+TF_VAR_tectonic_dns_name="${CLUSTER}"
+TECTONIC_INSTALLER_DIR=/go/src/github.com/coreos/tectonic-installer

 docker run \
       --rm \
-       -e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY \
-       -v $PWD:$PO_GOPATH \
-       -w $PO_GOPATH/scripts/jenkins \
-       cluster-setup-env \
-       /bin/bash -c "make clean"
+       -v $PWD/build/:$TECTONIC_INSTALLER_DIR/build/ \
+       -v ~/.ssh:$HOME/.ssh \
+       -e AWS_ACCESS_KEY_ID \
+       -e AWS_SECRET_ACCESS_KEY \
+       -e TF_GET_OPTIONS \
+       -e TF_DESTROY_OPTIONS \
+       -e CLUSTER=${CLUSTER} \
+       -w $TECTONIC_INSTALLER_DIR \
+       -e TF_VAR_tectonic_cluster_name=${TF_VAR_tectonic_cluster_name} \
+       -e TF_VAR_tectonic_dns_name=${TF_VAR_tectonic_dns_name} \
+       quay.io/coreos/tectonic-installer:master \
+       /bin/bash -c "make destroy || make destroy || make destroy"

 docker rmi quay.io/coreos/prometheus-operator-dev:$BUILD_ID
--- a/scripts/jenkins/run-e2e-tests.sh
+++ b/scripts/jenkins/run-e2e-tests.sh
@ -8,25 +8,79 @@ set -u
 # print each command before executing it
 set -x

+
+# Push docker image
+
 DOCKER_SOCKET=/var/run/docker.sock
 PO_QUAY_REPO=quay.io/coreos/prometheus-operator-dev

-docker build -t cluster-setup-env scripts/jenkins/.
+docker build -t docker-golang-env -f scripts/jenkins/docker-golang-env/Dockerfile .
+
 docker run \
       --rm \
       -v $PWD:$PWD -v $DOCKER_SOCKET:$DOCKER_SOCKET \
-       cluster-setup-env \
+       docker-golang-env \
       /bin/bash -c "cd $PWD && make crossbuild"

 docker build -t $PO_QUAY_REPO:$BUILD_ID .
 docker login -u="$QUAY_ROBOT_USERNAME" -p="$QUAY_ROBOT_SECRET" quay.io
 docker push $PO_QUAY_REPO:$BUILD_ID

+
+# Bring up k8s cluster
+
+export {TF_GET_OPTIONS,TF_PLAN_OPTIONS,TF_APPLY_OPTIONS,TF_DESTROY_OPTIONS}="-no-color"
+
+CLUSTER="po-$(git rev-parse --short HEAD)-${BUILD_ID}"
+TF_VAR_tectonic_cluster_name="${CLUSTER}"
+TF_VAR_tectonic_dns_name="${CLUSTER}"
+TECTONIC_INSTALLER_DIR=/go/src/github.com/coreos/tectonic-installer
+PO_DIR=/go/src/github.com/coreos/prometheus-operator
+KUBECONFIG="${PO_DIR}/build/${CLUSTER}/generated/auth/kubeconfig"
+
+mkdir -p build/${CLUSTER}
+cp ${WORKSPACE}/scripts/jenkins/kubernetes-vanilla.tfvars build/${CLUSTER}/terraform.tfvars
+
 docker run \
       --rm \
-       -e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY \
-       -e REPO=$PO_QUAY_REPO -e TAG=$BUILD_ID \
-       -v $PWD:/go/src/github.com/coreos/prometheus-operator \
-       -w /go/src/github.com/coreos/prometheus-operator/scripts/jenkins \
-       cluster-setup-env \
-       /bin/bash -c "make"
+       -v $PWD/build/:$TECTONIC_INSTALLER_DIR/build/ \
+       -v ~/.ssh:$HOME/.ssh \
+       -e AWS_ACCESS_KEY_ID \
+       -e AWS_SECRET_ACCESS_KEY \
+       -e TF_GET_OPTIONS \
+       -e TF_PLAN_OPTIONS \
+       -e TF_APPLY_OPTIONS \
+       -e CLUSTER=${CLUSTER} \
+       -e TF_VAR_tectonic_cluster_name=${TF_VAR_tectonic_cluster_name} \
+       -e TF_VAR_tectonic_dns_name=${TF_VAR_tectonic_dns_name} \
+       -w $TECTONIC_INSTALLER_DIR \
+       quay.io/coreos/tectonic-installer:master \
+       /bin/bash -c "touch license secret && make plan && make apply"
+
+docker build \
+       -t kubectl-env \
+       -f scripts/jenkins/kubectl-env/Dockerfile \
+       .
+
+sleep 5m
+docker run \
+       --rm \
+       -v $PWD:$PO_DIR \
+       -w $PO_DIR \
+       -e KUBECONFIG=${KUBECONFIG} \
+       kubectl-env \
+       /bin/bash -c "timeout 900 ./scripts/jenkins/wait-for-cluster.sh 4"
+
+
+# Run e2e tests
+
+docker run \
+       --rm \
+       -v $PWD:$PO_DIR \
+       -w $PO_DIR \
+       -e KUBECONFIG=${KUBECONFIG} \
+       -e REPO=$PO_QUAY_REPO \
+       -e TAG=$BUILD_ID \
+       kubectl-env \
+       /bin/bash -c "make e2e-test"
+
--- a/scripts/jenkins/templates/kops-prereq.tf
+++ b/scripts/jenkins/templates/kops-prereq.tf
@ -1,56 +0,0 @@
-variable "dns_domain" {}
-
-variable "cluster_name" {}
-
-data "aws_route53_zone" "monitoring_zone" {
-  name = "${var.dns_domain}"
-}
-
-resource "aws_route53_zone" "cluster_zone" {
-  name = "${var.cluster_name}.${var.dns_domain}"
-}
-
-resource "aws_route53_record" "cluster_zone_record" {
-  name    = "${var.cluster_name}.${var.dns_domain}"
-  zone_id = "${data.aws_route53_zone.monitoring_zone.zone_id}"
-  type    = "NS"
-  ttl     = "300"
-  records = ["${aws_route53_zone.cluster_zone.name_servers}"]
-}
-
-resource "aws_s3_bucket" "kops-state" {
-  bucket = "kops-${sha1("${var.cluster_name}-${var.dns_domain}")}"
-}
-
-resource "aws_security_group" "allow_all" {
-    name = "allow_all"
-    description = "Allow all inbound traffic"
-    vpc_id = "${aws_vpc.main.id}"
-
-    ingress {
-        from_port = 30000
-        to_port = 32767
-        protocol = "tcp"
-        cidr_blocks = ["0.0.0.0/0"]
-    }
-
-    ingress {
-        from_port = 80
-        to_port = 80
-        protocol = "tcp"
-        cidr_blocks = ["0.0.0.0/0"]
-    }
-
-    tags {
-        Name = "allow_all"
-     }
-}
-
-resource "aws_vpc" "main" {
-    cidr_block = "172.20.0.0/16"
-}
-
-resource "aws_internet_gateway" "gw" {
-    vpc_id = "${aws_vpc.main.id}"
-}
-
--- a/scripts/jenkins/templates/outputs.tf
+++ b/scripts/jenkins/templates/outputs.tf
@ -1,11 +0,0 @@
-output "kops_state_bucket" {
-  value = "s3://${aws_s3_bucket.kops-state.id}"
-}
-
-output "kops_master_security_group" {
-  value = "${aws_security_group.allow_all.id}"
-}
-
-output "kops_main_vpc" {
-  value = "${aws_vpc.main.id}"
-}
--- a/test/e2e/alertmanager_test.go
+++ b/test/e2e/alertmanager_test.go
@ -16,10 +16,8 @@ package e2e

 import (
 	"fmt"
-	"net/http"
 	"strconv"
 	"testing"
-	"time"

 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/client-go/pkg/api/v1"
@ -34,6 +32,7 @@ func TestAlertmanagerCreateDeleteCluster(t *testing.T) {
 	ctx := framework.NewTestCtx(t)
 	defer ctx.Cleanup(t)
 	ns := ctx.CreateNamespace(t, framework.KubeClient)
+	ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)

 	name := "test"

@ -52,6 +51,7 @@ func TestAlertmanagerScaling(t *testing.T) {
 	ctx := framework.NewTestCtx(t)
 	defer ctx.Cleanup(t)
 	ns := ctx.CreateNamespace(t, framework.KubeClient)
+	ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)

 	name := "test"

@ -74,6 +74,7 @@ func TestAlertmanagerVersionMigration(t *testing.T) {
 	ctx := framework.NewTestCtx(t)
 	defer ctx.Cleanup(t)
 	ns := ctx.CreateNamespace(t, framework.KubeClient)
+	ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)

 	name := "test"

@ -94,38 +95,13 @@ func TestAlertmanagerVersionMigration(t *testing.T) {
 	}
 }

-func TestExposingAlertmanagerWithNodePort(t *testing.T) {
-	ctx := framework.NewTestCtx(t)
-	defer ctx.Cleanup(t)
-	ns := ctx.CreateNamespace(t, framework.KubeClient)
-
-	alertmanager := framework.MakeBasicAlertmanager("test-alertmanager", 1)
-	alertmanagerService := framework.MakeAlertmanagerNodePortService(alertmanager.Name, "nodeport-service", 30903)
-
-	if err := framework.CreateAlertmanagerAndWaitUntilReady(ns, alertmanager); err != nil {
-		t.Fatal(err)
-	}
-
-	if finalizerFn, err := testFramework.CreateServiceAndWaitUntilReady(framework.KubeClient, ns, alertmanagerService); err != nil {
-		t.Fatal(err)
-	} else {
-		ctx.AddFinalizerFn(finalizerFn)
-	}
-
-	resp, err := http.Get(fmt.Sprintf("http://%s:30903/", framework.ClusterIP))
-	if err != nil {
-		t.Fatal("Retrieving alertmanager landing page failed with error: ", err)
-	} else if resp.StatusCode != 200 {
-		t.Fatal("Retrieving alertmanager landing page failed with http status code: ", resp.StatusCode)
-	}
-}
-
 func TestExposingAlertmanagerWithKubernetesAPI(t *testing.T) {
 	t.Parallel()

 	ctx := framework.NewTestCtx(t)
 	defer ctx.Cleanup(t)
 	ns := ctx.CreateNamespace(t, framework.KubeClient)
+	ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)

 	alertmanager := framework.MakeBasicAlertmanager("test-alertmanager", 1)
 	alertmanagerService := framework.MakeAlertmanagerService(alertmanager.Name, "alertmanager-service", v1.ServiceTypeClusterIP)
@ -146,50 +122,13 @@ func TestExposingAlertmanagerWithKubernetesAPI(t *testing.T) {
 	}
 }

-func TestExposingAlertmanagerWithIngress(t *testing.T) {
-	t.Parallel()
-
-	ctx := framework.NewTestCtx(t)
-	defer ctx.Cleanup(t)
-	ns := ctx.CreateNamespace(t, framework.KubeClient)
-
-	alertmanager := framework.MakeBasicAlertmanager("main", 1)
-	alertmanagerService := framework.MakeAlertmanagerService(alertmanager.Name, "test-group", v1.ServiceTypeClusterIP)
-	ingress := testFramework.MakeBasicIngress(alertmanagerService.Name, 9093)
-
-	if err := testFramework.SetupNginxIngressControllerIncDefaultBackend(framework.KubeClient, ns); err != nil {
-		t.Fatal(err)
-	}
-
-	if err := framework.CreateAlertmanagerAndWaitUntilReady(ns, alertmanager); err != nil {
-		t.Fatal(err)
-	}
-
-	if _, err := testFramework.CreateServiceAndWaitUntilReady(framework.KubeClient, ns, alertmanagerService); err != nil {
-		t.Fatal(err)
-	}
-
-	if err := testFramework.CreateIngress(framework.KubeClient, ns, ingress); err != nil {
-		t.Fatal(err)
-	}
-
-	ip, err := testFramework.GetIngressIP(framework.KubeClient, ns, ingress.Name)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	err = testFramework.WaitForHTTPSuccessStatusCode(time.Minute, fmt.Sprintf("http://%s/metrics", *ip))
-	if err != nil {
-		t.Fatal(err)
-	}
-}
-
 func TestMeshInitialization(t *testing.T) {
 	t.Parallel()

 	ctx := framework.NewTestCtx(t)
 	defer ctx.Cleanup(t)
 	ns := ctx.CreateNamespace(t, framework.KubeClient)
+	ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)

 	var amountAlertmanagers int32 = 3
 	alertmanager := &v1alpha1.Alertmanager{
@ -226,6 +165,7 @@ func TestAlertmanagerReloadConfig(t *testing.T) {
 	ctx := framework.NewTestCtx(t)
 	defer ctx.Cleanup(t)
 	ns := ctx.CreateNamespace(t, framework.KubeClient)
+	ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)

 	alertmanager := framework.MakeBasicAlertmanager("reload-config", 1)

--- a/test/e2e/framework/alertmanager.go
+++ b/test/e2e/framework/alertmanager.go
@ -56,12 +56,6 @@ func (f *Framework) MakeBasicAlertmanager(name string, replicas int32) *v1alpha1
 	}
 }

-func (f *Framework) MakeAlertmanagerNodePortService(name, group string, nodePort int32) *v1.Service {
-	aMService := f.MakeAlertmanagerService(name, group, v1.ServiceTypeNodePort)
-	aMService.Spec.Ports[0].NodePort = nodePort
-	return aMService
-}
-
 func (f *Framework) MakeAlertmanagerService(name, group string, serviceType v1.ServiceType) *v1.Service {
 	service := &v1.Service{
 		ObjectMeta: metav1.ObjectMeta{
--- a/test/e2e/framework/cluster_role_binding.go
+++ b/test/e2e/framework/cluster_role_binding.go
@ -21,29 +21,32 @@ import (
 	rbacv1alpha1 "k8s.io/client-go/pkg/apis/rbac/v1alpha1"
 )

-func CreateClusterRoleBinding(kubeClient kubernetes.Interface, relativePath string) error {
+func CreateClusterRoleBinding(kubeClient kubernetes.Interface, ns string, relativePath string) (finalizerFn, error) {
+	finalizerFn := func() error { return DeleteClusterRoleBinding(kubeClient, relativePath) }
 	clusterRoleBinding, err := parseClusterRoleBindingYaml(relativePath)
 	if err != nil {
-		return err
+		return finalizerFn, err
 	}

+	clusterRoleBinding.Subjects[0].Namespace = ns
+
 	_, err = kubeClient.RbacV1alpha1().ClusterRoleBindings().Get(clusterRoleBinding.Name, metav1.GetOptions{})

 	if err == nil {
 		// ClusterRoleBinding already exists -> Update
 		_, err = kubeClient.RbacV1alpha1().ClusterRoleBindings().Update(clusterRoleBinding)
 		if err != nil {
-			return err
+			return finalizerFn, err
 		}
 	} else {
 		// ClusterRoleBinding doesn't exists -> Create
 		_, err = kubeClient.RbacV1alpha1().ClusterRoleBindings().Create(clusterRoleBinding)
 		if err != nil {
-			return err
+			return finalizerFn, err
 		}
 	}

-	return nil
+	return finalizerFn, err
 }

 func DeleteClusterRoleBinding(kubeClient kubernetes.Interface, relativePath string) error {
--- a/test/e2e/framework/deployment.go
+++ b/test/e2e/framework/deployment.go
@ -15,6 +15,8 @@
 package framework

 import (
+	"fmt"
+	"github.com/pkg/errors"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/util/yaml"
 	"k8s.io/client-go/kubernetes"
@ -28,7 +30,7 @@ func MakeDeployment(pathToYaml string) (*v1beta1.Deployment, error) {
 	}
 	tectonicPromOp := v1beta1.Deployment{}
 	if err := yaml.NewYAMLOrJSONDecoder(manifest, 100).Decode(&tectonicPromOp); err != nil {
-		return nil, err
+		return nil, errors.Wrap(err, fmt.Sprintf("failed to decode file %s", pathToYaml))
 	}

 	return &tectonicPromOp, nil
@ -37,7 +39,7 @@ func MakeDeployment(pathToYaml string) (*v1beta1.Deployment, error) {
 func CreateDeployment(kubeClient kubernetes.Interface, namespace string, d *v1beta1.Deployment) error {
 	_, err := kubeClient.Extensions().Deployments(namespace).Create(d)
 	if err != nil {
-		return err
+		return errors.Wrap(err, fmt.Sprintf("failed to create deployment %s", d.Name))
 	}
 	return nil
 }
--- a/test/e2e/framework/framework.go
+++ b/test/e2e/framework/framework.go
@ -16,6 +16,7 @@ package framework

 import (
 	"net/http"
+	"testing"
 	"time"

 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -37,30 +38,29 @@ type Framework struct {
 	MasterHost     string
 	Namespace      *v1.Namespace
 	OperatorPod    *v1.Pod
-	ClusterIP      string
 	DefaultTimeout time.Duration
 }

 // Setup setups a test framework and returns it.
-func New(ns, kubeconfig, opImage, ip string) (*Framework, error) {
+func New(ns, kubeconfig, opImage string) (*Framework, error) {
 	config, err := clientcmd.BuildConfigFromFlags("", kubeconfig)
 	if err != nil {
-		return nil, err
+		return nil, errors.Wrap(err, "build config from flags failed")
 	}

 	cli, err := kubernetes.NewForConfig(config)
 	if err != nil {
-		return nil, err
+		return nil, errors.Wrap(err, "creating new kube-client failed")
 	}

 	httpc := cli.CoreV1().RESTClient().(*rest.RESTClient).Client
 	if err != nil {
-		return nil, err
+		return nil, errors.Wrap(err, "creating http-client failed")
 	}

 	mclient, err := v1alpha1.NewForConfig(config)
 	if err != nil {
-		return nil, err
+		return nil, errors.Wrap(err, "creating monitoring client failed")
 	}

 	namespace, err := CreateNamespace(cli, ns)
@ -74,13 +74,12 @@ func New(ns, kubeconfig, opImage, ip string) (*Framework, error) {
 		MonClient:      mclient,
 		HTTPClient:     httpc,
 		Namespace:      namespace,
-		ClusterIP:      ip,
 		DefaultTimeout: time.Minute,
 	}

 	err = f.Setup(opImage)
 	if err != nil {
-		return nil, err
+		return nil, errors.Wrap(err, "setup test environment failed")
 	}

 	return f, nil
@ -88,13 +87,29 @@ func New(ns, kubeconfig, opImage, ip string) (*Framework, error) {

 func (f *Framework) Setup(opImage string) error {
 	if err := f.setupPrometheusOperator(opImage); err != nil {
-		return err
+		return errors.Wrap(err, "setup prometheus operator failed")
 	}
 	return nil
 }

 func (f *Framework) setupPrometheusOperator(opImage string) error {
-	deploy, err := MakeDeployment("../../example/non-rbac/prometheus-operator.yaml")
+	if _, err := CreateServiceAccount(f.KubeClient, f.Namespace.Name, "../../example/rbac/prometheus-operator/prometheus-operator-service-account.yaml"); err != nil {
+		return errors.Wrap(err, "failed to create prometheus operator service account")
+	}
+
+	if err := CreateClusterRole(f.KubeClient, "../../example/rbac/prometheus-operator/prometheus-operator-cluster-role.yaml"); err != nil {
+		return errors.Wrap(err, "failed to create prometheus cluster role")
+	}
+
+	if _, err := CreateClusterRoleBinding(f.KubeClient, f.Namespace.Name, "../../example/rbac/prometheus-operator/prometheus-operator-cluster-role-binding.yaml"); err != nil {
+		return errors.Wrap(err, "failed to create prometheus cluster role binding")
+	}
+
+	if err := CreateClusterRole(f.KubeClient, "../../example/rbac/prometheus/prometheus-cluster-role.yaml"); err != nil {
+		return errors.Wrap(err, "failed to create prometheus cluster role")
+	}
+
+	deploy, err := MakeDeployment("../../example/rbac/prometheus-operator/prometheus-operator.yaml")
 	if err != nil {
 		return err
 	}
@ -134,6 +149,20 @@ func (f *Framework) setupPrometheusOperator(opImage string) error {
 	return k8sutil.WaitForCRDReady(f.KubeClient.Core().RESTClient(), v1alpha1.Group, v1alpha1.Version, v1alpha1.AlertmanagerName)
 }

+func (ctx *TestCtx) SetupPrometheusRBAC(t *testing.T, ns string, kubeClient kubernetes.Interface) {
+	if finalizerFn, err := CreateServiceAccount(kubeClient, ns, "../../example/rbac/prometheus/prometheus-service-account.yaml"); err != nil {
+		t.Fatal(errors.Wrap(err, "failed to create prometheus service account"))
+	} else {
+		ctx.AddFinalizerFn(finalizerFn)
+	}
+
+	if finalizerFn, err := CreateRoleBinding(kubeClient, ns, "framework/ressources/prometheus-role-binding.yml"); err != nil {
+		t.Fatal(errors.Wrap(err, "failed to create prometheus role binding"))
+	} else {
+		ctx.AddFinalizerFn(finalizerFn)
+	}
+}
+
 // Teardown tears down a previously initialized test environment.
 func (f *Framework) Teardown() error {
 	if err := f.KubeClient.Core().Services(f.Namespace.Name).Delete("prometheus-operated", nil); err != nil && !k8sutil.IsResourceNotFoundError(err) {
--- a/test/e2e/framework/helpers.go
+++ b/test/e2e/framework/helpers.go
@ -34,12 +34,12 @@ import (
 func PathToOSFile(relativPath string) (*os.File, error) {
 	path, err := filepath.Abs(relativPath)
 	if err != nil {
-		return nil, err
+		return nil, errors.Wrap(err, fmt.Sprintf("failed generate absolut file path of %s", relativPath))
 	}

 	manifest, err := os.Open(path)
 	if err != nil {
-		return nil, err
+		return nil, errors.Wrap(err, fmt.Sprintf("failed to open file %s", path))
 	}

 	return manifest, nil
--- a/test/e2e/framework/prometheus.go
+++ b/test/e2e/framework/prometheus.go
@ -17,8 +17,8 @@ package framework
 import (
 	"encoding/json"
 	"fmt"
+	"io"
 	"log"
-	"net/http"
 	"time"

 	"k8s.io/apimachinery/pkg/api/resource"
@ -40,11 +40,13 @@ func (f *Framework) MakeBasicPrometheus(ns, name, group string, replicas int32)
 		},
 		Spec: v1alpha1.PrometheusSpec{
 			Replicas: &replicas,
+			Version:  prometheus.DefaultVersion,
 			ServiceMonitorSelector: &metav1.LabelSelector{
 				MatchLabels: map[string]string{
 					"group": group,
 				},
 			},
+			ServiceAccountName: "prometheus",
 			RuleSelector: &metav1.LabelSelector{
 				MatchLabels: map[string]string{
 					"role": "rulefile",
@ -95,12 +97,6 @@ func (f *Framework) MakeBasicServiceMonitor(name string) *v1alpha1.ServiceMonito
 	}
 }

-func (f *Framework) MakeBasicPrometheusNodePortService(name, group string, nodePort int32) *v1.Service {
-	pService := f.MakePrometheusService(name, group, v1.ServiceTypeNodePort)
-	pService.Spec.Ports[0].NodePort = nodePort
-	return pService
-}
-
 func (f *Framework) MakePrometheusService(name, group string, serviceType v1.ServiceType) *v1.Service {
 	service := &v1.Service{
 		ObjectMeta: metav1.ObjectMeta{
@ -202,12 +198,12 @@ func promImage(version string) string {
 	return fmt.Sprintf("quay.io/prometheus/prometheus:%s", version)
 }

-func (f *Framework) WaitForTargets(amount int) error {
+func (f *Framework) WaitForTargets(ns, svcName string, amount int) error {
 	var targets []*Target

 	if err := wait.Poll(time.Second, time.Minute*10, func() (bool, error) {
 		var err error
-		targets, err = f.GetActiveTargets()
+		targets, err = f.GetActiveTargets(ns, svcName)
 		if err != nil {
 			return false, err
 		}
@ -224,15 +220,20 @@ func (f *Framework) WaitForTargets(amount int) error {
 	return nil
 }

-func (f *Framework) GetActiveTargets() ([]*Target, error) {
-	resp, err := http.Get(fmt.Sprintf("http://%s:30900/api/v1/targets", f.ClusterIP))
+func (f *Framework) QueryPrometheusSVC(ns, svcName, endpoint string, query map[string]string) (io.ReadCloser, error) {
+	ProxyGet := f.KubeClient.CoreV1().Services(ns).ProxyGet
+	request := ProxyGet("", svcName, "web", endpoint, query)
+	return request.Stream()
+}
+
+func (f *Framework) GetActiveTargets(ns, svcName string) ([]*Target, error) {
+	response, err := f.QueryPrometheusSVC(ns, svcName, "/api/v1/targets", map[string]string{})
 	if err != nil {
 		return nil, err
 	}
-	defer resp.Body.Close()

 	rt := prometheusTargetAPIResponse{}
-	if err := json.NewDecoder(resp.Body).Decode(&rt); err != nil {
+	if err := json.NewDecoder(response).Decode(&rt); err != nil {
 		return nil, err
 	}

--- a/test/e2e/framework/ressources/prometheus-role-binding.yml
+++ b/test/e2e/framework/ressources/prometheus-role-binding.yml
@ -0,0 +1,11 @@
+apiVersion: rbac.authorization.k8s.io/v1beta1
+kind: RoleBinding
+metadata:
+  name: prometheus
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: prometheus
+subjects:
+- kind: ServiceAccount
+  name: prometheus
--- a/test/e2e/framework/role-binding.go
+++ b/test/e2e/framework/role-binding.go
@ -0,0 +1,60 @@
+// Copyright 2017 The prometheus-operator Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package framework
+
+import (
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/yaml"
+	"k8s.io/client-go/kubernetes"
+	rbacv1alpha1 "k8s.io/client-go/pkg/apis/rbac/v1alpha1"
+)
+
+func CreateRoleBinding(kubeClient kubernetes.Interface, ns string, relativePath string) (finalizerFn, error) {
+	finalizerFn := func() error { return DeleteRoleBinding(kubeClient, ns, relativePath) }
+	roleBinding, err := parseRoleBindingYaml(relativePath)
+	if err != nil {
+		return finalizerFn, err
+	}
+
+	_, err = kubeClient.RbacV1alpha1().RoleBindings(ns).Create(roleBinding)
+	return finalizerFn, err
+}
+
+func DeleteRoleBinding(kubeClient kubernetes.Interface, ns string, relativePath string) error {
+	roleBinding, err := parseRoleBindingYaml(relativePath)
+	if err != nil {
+		return err
+	}
+
+	if err := kubeClient.RbacV1alpha1().RoleBindings(ns).Delete(roleBinding.Name, &metav1.DeleteOptions{}); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func parseRoleBindingYaml(relativePath string) (*rbacv1alpha1.RoleBinding, error) {
+	manifest, err := PathToOSFile(relativePath)
+	if err != nil {
+		return nil, err
+	}
+
+	roleBinding := rbacv1alpha1.RoleBinding{}
+	if err := yaml.NewYAMLOrJSONDecoder(manifest, 100).Decode(&roleBinding); err != nil {
+		return nil, err
+	}
+
+	return &roleBinding, nil
+}
--- a/test/e2e/framework/service_account.go
+++ b/test/e2e/framework/service_account.go
@ -20,21 +20,40 @@ import (
 	"k8s.io/client-go/pkg/api/v1"
 )

-func CreateServiceAccount(kubeClient kubernetes.Interface, namespace string, relativPath string) error {
+func CreateServiceAccount(kubeClient kubernetes.Interface, namespace string, relativPath string) (finalizerFn, error) {
+	finalizerFn := func() error { return DeleteServiceAccount(kubeClient, namespace, relativPath) }
+
+	serviceAccount, err := parseServiceAccountYaml(relativPath)
+	if err != nil {
+		return finalizerFn, err
+	}
+	_, err = kubeClient.CoreV1().ServiceAccounts(namespace).Create(serviceAccount)
+	if err != nil {
+		return finalizerFn, err
+	}
+
+	return finalizerFn, nil
+}
+
+func parseServiceAccountYaml(relativPath string) (*v1.ServiceAccount, error) {
 	manifest, err := PathToOSFile(relativPath)
 	if err != nil {
-		return err
+		return nil, err
 	}

 	serviceAccount := v1.ServiceAccount{}
 	if err := yaml.NewYAMLOrJSONDecoder(manifest, 100).Decode(&serviceAccount); err != nil {
-		return err
+		return nil, err
 	}

-	_, err = kubeClient.CoreV1().ServiceAccounts(namespace).Create(&serviceAccount)
+	return &serviceAccount, nil
+}
+
+func DeleteServiceAccount(kubeClient kubernetes.Interface, namespace string, relativPath string) error {
+	serviceAccount, err := parseServiceAccountYaml(relativPath)
 	if err != nil {
 		return err
 	}

-	return nil
+	return kubeClient.CoreV1().ServiceAccounts(namespace).Delete(serviceAccount.Name, nil)
 }
--- a/test/e2e/main_test.go
+++ b/test/e2e/main_test.go
@ -32,7 +32,6 @@ func TestMain(m *testing.M) {
 	kubeconfig := flag.String("kubeconfig", "", "kube config path, e.g. $HOME/.kube/config")
 	opImage := flag.String("operator-image", "", "operator image, e.g. quay.io/coreos/prometheus-operator")
 	ns := flag.String("namespace", "prometheus-operator-e2e-tests", "e2e test namespace")
-	ip := flag.String("cluster-ip", "", "ip of the kubernetes cluster to use for external requests")
 	flag.Parse()

 	var (
@ -40,7 +39,7 @@ func TestMain(m *testing.M) {
 		code int = 0
 	)

-	if framework, err = operatorFramework.New(*ns, *kubeconfig, *opImage, *ip); err != nil {
+	if framework, err = operatorFramework.New(*ns, *kubeconfig, *opImage); err != nil {
 		log.Printf("failed to setup framework: %v\n", err)
 		os.Exit(1)
 	}
--- a/test/e2e/prometheus_test.go
+++ b/test/e2e/prometheus_test.go
@ -18,7 +18,6 @@ import (
 	"encoding/json"
 	"fmt"
 	"log"
-	"net/http"
 	"reflect"
 	"sort"
 	"strings"
@ -27,6 +26,7 @@ import (

 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/intstr"
 	"k8s.io/apimachinery/pkg/util/wait"
 	"k8s.io/client-go/pkg/api/v1"

@ -43,6 +43,7 @@ func TestPrometheusCreateDeleteCluster(t *testing.T) {
 	ctx := framework.NewTestCtx(t)
 	defer ctx.Cleanup(t)
 	ns := ctx.CreateNamespace(t, framework.KubeClient)
+	ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)

 	name := "test"

@ -64,6 +65,7 @@ func TestPrometheusScaleUpDownCluster(t *testing.T) {
 	ctx := framework.NewTestCtx(t)
 	defer ctx.Cleanup(t)
 	ns := ctx.CreateNamespace(t, framework.KubeClient)
+	ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)

 	name := "test"

@ -86,6 +88,7 @@ func TestPrometheusVersionMigration(t *testing.T) {
 	ctx := framework.NewTestCtx(t)
 	defer ctx.Cleanup(t)
 	ns := ctx.CreateNamespace(t, framework.KubeClient)
+	ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)

 	name := "test"
 	startVersion := prometheus.CompatibilityMatrix[0]
@ -114,6 +117,7 @@ func TestPrometheusResourceUpdate(t *testing.T) {
 	ctx := framework.NewTestCtx(t)
 	defer ctx.Cleanup(t)
 	ns := ctx.CreateNamespace(t, framework.KubeClient)
+	ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)

 	name := "test"

@ -162,6 +166,7 @@ func TestPrometheusReloadConfig(t *testing.T) {
 	ctx := framework.NewTestCtx(t)
 	defer ctx.Cleanup(t)
 	ns := ctx.CreateNamespace(t, framework.KubeClient)
+	ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)

 	name := "test"
 	replicas := int32(1)
@ -201,7 +206,7 @@ scrape_configs:
 		},
 	}

-	svc := framework.MakeBasicPrometheusNodePortService(name, "reloadconfig-group", 30900)
+	svc := framework.MakePrometheusService(p.Name, "not-relevant", v1.ServiceTypeClusterIP)

 	if _, err := framework.KubeClient.CoreV1().Secrets(ns).Create(cfg); err != nil {
 		t.Fatal(err)
@ -217,7 +222,7 @@ scrape_configs:
 		ctx.AddFinalizerFn(finalizerFn)
 	}

-	if err := framework.WaitForTargets(1); err != nil {
+	if err := framework.WaitForTargets(ns, svc.Name, 1); err != nil {
 		t.Fatal(err)
 	}

@ -238,7 +243,7 @@ scrape_configs:
 		t.Fatal(err)
 	}

-	if err := framework.WaitForTargets(2); err != nil {
+	if err := framework.WaitForTargets(ns, svc.Name, 2); err != nil {
 		t.Fatal(err)
 	}
 }
@ -249,6 +254,7 @@ func TestPrometheusReloadRules(t *testing.T) {
 	ctx := framework.NewTestCtx(t)
 	defer ctx.Cleanup(t)
 	ns := ctx.CreateNamespace(t, framework.KubeClient)
+	ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)

 	name := "test"

@ -269,7 +275,8 @@ func TestPrometheusReloadRules(t *testing.T) {
 		t.Fatal(err)
 	}

-	if err := framework.CreatePrometheusAndWaitUntilReady(ns, framework.MakeBasicPrometheus(ns, name, name, 1)); err != nil {
+	p := framework.MakeBasicPrometheus(ns, name, name, 1)
+	if err := framework.CreatePrometheusAndWaitUntilReady(ns, p); err != nil {
 		t.Fatal(err)
 	}

@ -301,10 +308,11 @@ func TestPrometheusDiscovery(t *testing.T) {
 	ctx := framework.NewTestCtx(t)
 	defer ctx.Cleanup(t)
 	ns := ctx.CreateNamespace(t, framework.KubeClient)
+	ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)

 	prometheusName := "test"
 	group := "servicediscovery-test"
-	svc := framework.MakeBasicPrometheusNodePortService(prometheusName, group, 30900)
+	svc := framework.MakePrometheusService(prometheusName, group, v1.ServiceTypeClusterIP)

 	s := framework.MakeBasicServiceMonitor(group)
 	if _, err := framework.MonClient.ServiceMonitors(ns).Create(s); err != nil {
@ -328,7 +336,7 @@ func TestPrometheusDiscovery(t *testing.T) {
 		t.Fatal("Generated Secret could not be retrieved: ", err)
 	}

-	err = wait.Poll(time.Second, 18*time.Minute, isDiscoveryWorking(ns, prometheusName))
+	err = wait.Poll(time.Second, 18*time.Minute, isDiscoveryWorking(ns, svc.Name, prometheusName))
 	if err != nil {
 		t.Fatal(errors.Wrap(err, "validating Prometheus target discovery failed"))
 	}
@ -338,12 +346,13 @@ func TestPrometheusAlertmanagerDiscovery(t *testing.T) {
 	ctx := framework.NewTestCtx(t)
 	defer ctx.Cleanup(t)
 	ns := ctx.CreateNamespace(t, framework.KubeClient)
+	ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)

 	prometheusName := "test"
 	alertmanagerName := "test"
 	group := "servicediscovery-test"
-	svc := framework.MakeBasicPrometheusNodePortService(prometheusName, group, 30900)
-	amsvc := framework.MakeAlertmanagerNodePortService(alertmanagerName, group, 30903)
+	svc := framework.MakePrometheusService(prometheusName, group, v1.ServiceTypeClusterIP)
+	amsvc := framework.MakeAlertmanagerService(alertmanagerName, group, v1.ServiceTypeClusterIP)

 	p := framework.MakeBasicPrometheus(ns, prometheusName, group, 1)
 	framework.AddAlertingToPrometheus(p, ns, alertmanagerName)
@ -376,44 +385,19 @@ func TestPrometheusAlertmanagerDiscovery(t *testing.T) {
 		t.Fatal(errors.Wrap(err, "creating Alertmanager service failed"))
 	}

-	err = wait.Poll(time.Second, 18*time.Minute, isAlertmanagerDiscoveryWorking(ns, alertmanagerName))
+	err = wait.Poll(time.Second, 18*time.Minute, isAlertmanagerDiscoveryWorking(ns, svc.Name, alertmanagerName))
 	if err != nil {
 		t.Fatal(errors.Wrap(err, "validating Prometheus Alertmanager discovery failed"))
 	}
 }

-func TestExposingPrometheusWithNodePort(t *testing.T) {
-	ctx := framework.NewTestCtx(t)
-	defer ctx.Cleanup(t)
-	ns := ctx.CreateNamespace(t, framework.KubeClient)
-
-	basicPrometheus := framework.MakeBasicPrometheus(ns, "test", "test", 1)
-	service := framework.MakeBasicPrometheusNodePortService(basicPrometheus.Name, "nodeport-service", 30900)
-
-	if err := framework.CreatePrometheusAndWaitUntilReady(ns, basicPrometheus); err != nil {
-		t.Fatal("Creating prometheus failed: ", err)
-	}
-
-	if finalizerFn, err := testFramework.CreateServiceAndWaitUntilReady(framework.KubeClient, ns, service); err != nil {
-		t.Fatal("Creating prometheus service failed: ", err)
-	} else {
-		ctx.AddFinalizerFn(finalizerFn)
-	}
-
-	resp, err := http.Get(fmt.Sprintf("http://%s:30900/metrics", framework.ClusterIP))
-	if err != nil {
-		t.Fatal("Retrieving prometheus metrics failed with error: ", err)
-	} else if resp.StatusCode != 200 {
-		t.Fatal("Retrieving prometheus metrics failed with http status code: ", resp.StatusCode)
-	}
-}
-
 func TestExposingPrometheusWithKubernetesAPI(t *testing.T) {
 	t.Parallel()

 	ctx := framework.NewTestCtx(t)
 	defer ctx.Cleanup(t)
 	ns := ctx.CreateNamespace(t, framework.KubeClient)
+	ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)

 	basicPrometheus := framework.MakeBasicPrometheus(ns, "basic-prometheus", "test-group", 1)
 	service := framework.MakePrometheusService(basicPrometheus.Name, "test-group", v1.ServiceTypeClusterIP)
@ -434,47 +418,65 @@ func TestExposingPrometheusWithKubernetesAPI(t *testing.T) {
 	}
 }

-func TestExposingPrometheusWithIngress(t *testing.T) {
+func TestPrometheusDiscoverTargetPort(t *testing.T) {
 	t.Parallel()

 	ctx := framework.NewTestCtx(t)
 	defer ctx.Cleanup(t)
 	ns := ctx.CreateNamespace(t, framework.KubeClient)
+	ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)

-	prometheus := framework.MakeBasicPrometheus(ns, "main", "test-group", 1)
-	prometheusService := framework.MakePrometheusService(prometheus.Name, "test-group", v1.ServiceTypeClusterIP)
-	ingress := testFramework.MakeBasicIngress(prometheusService.Name, 9090)
+	prometheusName := "test"
+	group := "servicediscovery-test"
+	svc := framework.MakePrometheusService(prometheusName, group, v1.ServiceTypeClusterIP)

-	err := testFramework.SetupNginxIngressControllerIncDefaultBackend(framework.KubeClient, ns)
-	if err != nil {
+	if _, err := framework.MonClient.ServiceMonitors(ns).Create(&v1alpha1.ServiceMonitor{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: prometheusName,
+			Labels: map[string]string{
+				"group": group,
+			},
+		},
+		Spec: v1alpha1.ServiceMonitorSpec{
+			Selector: metav1.LabelSelector{
+				MatchLabels: map[string]string{
+					"group": group,
+				},
+			},
+			Endpoints: []v1alpha1.Endpoint{
+				v1alpha1.Endpoint{
+					TargetPort: intstr.FromInt(9090),
+					Interval:   "30s",
+				},
+			},
+		},
+	}); err != nil {
+		t.Fatal("Creating ServiceMonitor failed: ", err)
+	}
+
+	p := framework.MakeBasicPrometheus(ns, prometheusName, group, 1)
+	if err := framework.CreatePrometheusAndWaitUntilReady(ns, p); err != nil {
 		t.Fatal(err)
 	}

-	err = framework.CreatePrometheusAndWaitUntilReady(ns, prometheus)
-	if err != nil {
-		t.Fatal(err)
+	if finalizerFn, err := testFramework.CreateServiceAndWaitUntilReady(framework.KubeClient, ns, svc); err != nil {
+		t.Fatal(errors.Wrap(err, "creating prometheus service failed"))
+	} else {
+		ctx.AddFinalizerFn(finalizerFn)
 	}

-	if _, err := testFramework.CreateServiceAndWaitUntilReady(framework.KubeClient, ns, prometheusService); err != nil {
-		t.Fatal(err)
+	_, err := framework.KubeClient.CoreV1().Secrets(ns).Get(fmt.Sprintf("prometheus-%s", prometheusName), metav1.GetOptions{})
+	if err != nil {
+		t.Fatal("Generated Secret could not be retrieved: ", err)
 	}

-	err = testFramework.CreateIngress(framework.KubeClient, ns, ingress)
+	err = wait.Poll(time.Second, 3*time.Minute, isDiscoveryWorking(ns, svc.Name, prometheusName))
 	if err != nil {
-		t.Fatal(err)
-	}
-
-	ip, err := testFramework.GetIngressIP(framework.KubeClient, ns, ingress.Name)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	err = testFramework.WaitForHTTPSuccessStatusCode(time.Minute, fmt.Sprintf("http://%s:/metrics", *ip))
-	if err != nil {
-		t.Fatal(err)
+		t.Fatal(errors.Wrap(err, "validating Prometheus target discovery failed"))
 	}
 }
-func isDiscoveryWorking(ns, prometheusName string) func() (bool, error) {
+
+func isDiscoveryWorking(ns, svcName, prometheusName string) func() (bool, error) {
 	return func() (bool, error) {
 		pods, err := framework.KubeClient.CoreV1().Pods(ns).List(prometheus.ListOptions(prometheusName))
 		if err != nil {
@ -486,7 +488,7 @@ func isDiscoveryWorking(ns, prometheusName string) func() (bool, error) {
 		podIP := pods.Items[0].Status.PodIP
 		expectedTargets := []string{fmt.Sprintf("http://%s:9090/metrics", podIP)}

-		activeTargets, err := framework.GetActiveTargets()
+		activeTargets, err := framework.GetActiveTargets(ns, svcName)
 		if err != nil {
 			return false, err
 		}
@ -495,7 +497,7 @@ func isDiscoveryWorking(ns, prometheusName string) func() (bool, error) {
 			return false, nil
 		}

-		working, err := basicQueryWorking()
+		working, err := basicQueryWorking(ns, svcName)
 		if err != nil {
 			return false, err
 		}
@ -522,15 +524,14 @@ type prometheusQueryAPIResponse struct {
 	Data   *queryResult `json:"data"`
 }

-func basicQueryWorking() (bool, error) {
-	resp, err := http.Get(fmt.Sprintf("http://%s:30900/api/v1/query?query=up", framework.ClusterIP))
+func basicQueryWorking(ns, svcName string) (bool, error) {
+	response, err := framework.QueryPrometheusSVC(ns, svcName, "/api/v1/query", map[string]string{"query": "up"})
 	if err != nil {
 		return false, err
 	}
-	defer resp.Body.Close()

 	rq := prometheusQueryAPIResponse{}
-	if err := json.NewDecoder(resp.Body).Decode(&rq); err != nil {
+	if err := json.NewDecoder(response).Decode(&rq); err != nil {
 		return false, err
 	}

@ -542,7 +543,7 @@ func basicQueryWorking() (bool, error) {
 	return true, nil
 }

-func isAlertmanagerDiscoveryWorking(ns, alertmanagerName string) func() (bool, error) {
+func isAlertmanagerDiscoveryWorking(ns, promSVCName, alertmanagerName string) func() (bool, error) {
 	return func() (bool, error) {
 		pods, err := framework.KubeClient.CoreV1().Pods(ns).List(alertmanager.ListOptions(alertmanagerName))
 		if err != nil {
@ -556,14 +557,13 @@ func isAlertmanagerDiscoveryWorking(ns, alertmanagerName string) func() (bool, e
 			expectedAlertmanagerTargets = append(expectedAlertmanagerTargets, fmt.Sprintf("http://%s:9093/api/v1/alerts", p.Status.PodIP))
 		}

-		resp, err := http.Get(fmt.Sprintf("http://%s:30900/api/v1/alertmanagers", framework.ClusterIP))
+		response, err := framework.QueryPrometheusSVC(ns, promSVCName, "/api/v1/alertmanagers", map[string]string{})
 		if err != nil {
 			return false, err
 		}
-		defer resp.Body.Close()

 		ra := prometheusAlertmanagerAPIResponse{}
-		if err := json.NewDecoder(resp.Body).Decode(&ra); err != nil {
+		if err := json.NewDecoder(response).Decode(&ra); err != nil {
 			return false, err
 		}

--- a/vendor/github.com/cenkalti/backoff/LICENSE
+++ b/vendor/github.com/cenkalti/backoff/LICENSE
@ -0,0 +1,20 @@
+The MIT License (MIT)
+
+Copyright (c) 2014 Cenk Altı
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/vendor/github.com/cenkalti/backoff/README.md
+++ b/vendor/github.com/cenkalti/backoff/README.md
@ -0,0 +1,30 @@
+# Exponential Backoff [![GoDoc][godoc image]][godoc] [![Build Status][travis image]][travis] [![Coverage Status][coveralls image]][coveralls]
+
+This is a Go port of the exponential backoff algorithm from [Google's HTTP Client Library for Java][google-http-java-client].
+
+[Exponential backoff][exponential backoff wiki]
+is an algorithm that uses feedback to multiplicatively decrease the rate of some process,
+in order to gradually find an acceptable rate.
+The retries exponentially increase and stop increasing when a certain threshold is met.
+
+## Usage
+
+See https://godoc.org/github.com/cenkalti/backoff#pkg-examples
+
+## Contributing
+
+* I would like to keep this library as small as possible.
+* Please don't send a PR without opening an issue and discussing it first.
+* If proposed change is not a common use case, I will probably not accept it.
+
+[godoc]: https://godoc.org/github.com/cenkalti/backoff
+[godoc image]: https://godoc.org/github.com/cenkalti/backoff?status.png
+[travis]: https://travis-ci.org/cenkalti/backoff
+[travis image]: https://travis-ci.org/cenkalti/backoff.png?branch=master
+[coveralls]: https://coveralls.io/github/cenkalti/backoff?branch=master
+[coveralls image]: https://coveralls.io/repos/github/cenkalti/backoff/badge.svg?branch=master
+
+[google-http-java-client]: https://github.com/google/google-http-java-client
+[exponential backoff wiki]: http://en.wikipedia.org/wiki/Exponential_backoff
+
+[advanced example]: https://godoc.org/github.com/cenkalti/backoff#example_
--- a/vendor/github.com/cenkalti/backoff/backoff.go
+++ b/vendor/github.com/cenkalti/backoff/backoff.go
@ -0,0 +1,66 @@
+// Package backoff implements backoff algorithms for retrying operations.
+//
+// Use Retry function for retrying operations that may fail.
+// If Retry does not meet your needs,
+// copy/paste the function into your project and modify as you wish.
+//
+// There is also Ticker type similar to time.Ticker.
+// You can use it if you need to work with channels.
+//
+// See Examples section below for usage examples.
+package backoff
+
+import "time"
+
+// BackOff is a backoff policy for retrying an operation.
+type BackOff interface {
+	// NextBackOff returns the duration to wait before retrying the operation,
+	// or backoff.Stop to indicate that no more retries should be made.
+	//
+	// Example usage:
+	//
+	// 	duration := backoff.NextBackOff();
+	// 	if (duration == backoff.Stop) {
+	// 		// Do not retry operation.
+	// 	} else {
+	// 		// Sleep for duration and retry operation.
+	// 	}
+	//
+	NextBackOff() time.Duration
+
+	// Reset to initial state.
+	Reset()
+}
+
+// Stop indicates that no more retries should be made for use in NextBackOff().
+const Stop time.Duration = -1
+
+// ZeroBackOff is a fixed backoff policy whose backoff time is always zero,
+// meaning that the operation is retried immediately without waiting, indefinitely.
+type ZeroBackOff struct{}
+
+func (b *ZeroBackOff) Reset() {}
+
+func (b *ZeroBackOff) NextBackOff() time.Duration { return 0 }
+
+// StopBackOff is a fixed backoff policy that always returns backoff.Stop for
+// NextBackOff(), meaning that the operation should never be retried.
+type StopBackOff struct{}
+
+func (b *StopBackOff) Reset() {}
+
+func (b *StopBackOff) NextBackOff() time.Duration { return Stop }
+
+// ConstantBackOff is a backoff policy that always returns the same backoff delay.
+// This is in contrast to an exponential backoff policy,
+// which returns a delay that grows longer as you call NextBackOff() over and over again.
+type ConstantBackOff struct {
+	Interval time.Duration
+}
+
+func (b *ConstantBackOff) Reset()                     {}
+func (b *ConstantBackOff) NextBackOff() time.Duration { return b.Interval }
+
+func NewConstantBackOff(d time.Duration) *ConstantBackOff {
+	return &ConstantBackOff{Interval: d}
+}
--- a/vendor/github.com/cenkalti/backoff/context.go
+++ b/vendor/github.com/cenkalti/backoff/context.go
@ -0,0 +1,60 @@
+package backoff
+
+import (
+	"time"
+
+	"golang.org/x/net/context"
+)
+
+// BackOffContext is a backoff policy that stops retrying after the context
+// is canceled.
+type BackOffContext interface {
+	BackOff
+	Context() context.Context
+}
+
+type backOffContext struct {
+	BackOff
+	ctx context.Context
+}
+
+// WithContext returns a BackOffContext with context ctx
+//
+// ctx must not be nil
+func WithContext(b BackOff, ctx context.Context) BackOffContext {
+	if ctx == nil {
+		panic("nil context")
+	}
+
+	if b, ok := b.(*backOffContext); ok {
+		return &backOffContext{
+			BackOff: b.BackOff,
+			ctx:     ctx,
+		}
+	}
+
+	return &backOffContext{
+		BackOff: b,
+		ctx:     ctx,
+	}
+}
+
+func ensureContext(b BackOff) BackOffContext {
+	if cb, ok := b.(BackOffContext); ok {
+		return cb
+	}
+	return WithContext(b, context.Background())
+}
+
+func (b *backOffContext) Context() context.Context {
+	return b.ctx
+}
+
+func (b *backOffContext) NextBackOff() time.Duration {
+	select {
+	case <-b.Context().Done():
+		return Stop
+	default:
+		return b.BackOff.NextBackOff()
+	}
+}
--- a/vendor/github.com/cenkalti/backoff/exponential.go
+++ b/vendor/github.com/cenkalti/backoff/exponential.go
@ -0,0 +1,151 @@
+package backoff
+
+import (
+	"math/rand"
+	"time"
+)
+
+/*
+ExponentialBackOff is a backoff implementation that increases the backoff
+period for each retry attempt using a randomization function that grows exponentially.
+
+NextBackOff() is calculated using the following formula:
+
+ randomized interval =
+     RetryInterval * (random value in range [1 - RandomizationFactor, 1 + RandomizationFactor])
+
+In other words NextBackOff() will range between the randomization factor
+percentage below and above the retry interval.
+
+For example, given the following parameters:
+
+ RetryInterval = 2
+ RandomizationFactor = 0.5
+ Multiplier = 2
+
+the actual backoff period used in the next retry attempt will range between 1 and 3 seconds,
+multiplied by the exponential, that is, between 2 and 6 seconds.
+
+Note: MaxInterval caps the RetryInterval and not the randomized interval.
+
+If the time elapsed since an ExponentialBackOff instance is created goes past the
+MaxElapsedTime, then the method NextBackOff() starts returning backoff.Stop.
+
+The elapsed time can be reset by calling Reset().
+
+Example: Given the following default arguments, for 10 tries the sequence will be,
+and assuming we go over the MaxElapsedTime on the 10th try:
+
+ Request #  RetryInterval (seconds)  Randomized Interval (seconds)
+
+  1          0.5                     [0.25,   0.75]
+  2          0.75                    [0.375,  1.125]
+  3          1.125                   [0.562,  1.687]
+  4          1.687                   [0.8435, 2.53]
+  5          2.53                    [1.265,  3.795]
+  6          3.795                   [1.897,  5.692]
+  7          5.692                   [2.846,  8.538]
+  8          8.538                   [4.269, 12.807]
+  9         12.807                   [6.403, 19.210]
+ 10         19.210                   backoff.Stop
+
+Note: Implementation is not thread-safe.
+*/
+type ExponentialBackOff struct {
+	InitialInterval     time.Duration
+	RandomizationFactor float64
+	Multiplier          float64
+	MaxInterval         time.Duration
+	// After MaxElapsedTime the ExponentialBackOff stops.
+	// It never stops if MaxElapsedTime == 0.
+	MaxElapsedTime time.Duration
+	Clock          Clock
+
+	currentInterval time.Duration
+	startTime       time.Time
+}
+
+// Clock is an interface that returns current time for BackOff.
+type Clock interface {
+	Now() time.Time
+}
+
+// Default values for ExponentialBackOff.
+const (
+	DefaultInitialInterval     = 500 * time.Millisecond
+	DefaultRandomizationFactor = 0.5
+	DefaultMultiplier          = 1.5
+	DefaultMaxInterval         = 60 * time.Second
+	DefaultMaxElapsedTime      = 15 * time.Minute
+)
+
+// NewExponentialBackOff creates an instance of ExponentialBackOff using default values.
+func NewExponentialBackOff() *ExponentialBackOff {
+	b := &ExponentialBackOff{
+		InitialInterval:     DefaultInitialInterval,
+		RandomizationFactor: DefaultRandomizationFactor,
+		Multiplier:          DefaultMultiplier,
+		MaxInterval:         DefaultMaxInterval,
+		MaxElapsedTime:      DefaultMaxElapsedTime,
+		Clock:               SystemClock,
+	}
+	b.Reset()
+	return b
+}
+
+type systemClock struct{}
+
+func (t systemClock) Now() time.Time {
+	return time.Now()
+}
+
+// SystemClock implements Clock interface that uses time.Now().
+var SystemClock = systemClock{}
+
+// Reset the interval back to the initial retry interval and restarts the timer.
+func (b *ExponentialBackOff) Reset() {
+	b.currentInterval = b.InitialInterval
+	b.startTime = b.Clock.Now()
+}
+
+// NextBackOff calculates the next backoff interval using the formula:
+// 	Randomized interval = RetryInterval +/- (RandomizationFactor * RetryInterval)
+func (b *ExponentialBackOff) NextBackOff() time.Duration {
+	// Make sure we have not gone over the maximum elapsed time.
+	if b.MaxElapsedTime != 0 && b.GetElapsedTime() > b.MaxElapsedTime {
+		return Stop
+	}
+	defer b.incrementCurrentInterval()
+	return getRandomValueFromInterval(b.RandomizationFactor, rand.Float64(), b.currentInterval)
+}
+
+// GetElapsedTime returns the elapsed time since an ExponentialBackOff instance
+// is created and is reset when Reset() is called.
+//
+// The elapsed time is computed using time.Now().UnixNano().
+func (b *ExponentialBackOff) GetElapsedTime() time.Duration {
+	return b.Clock.Now().Sub(b.startTime)
+}
+
+// Increments the current interval by multiplying it with the multiplier.
+func (b *ExponentialBackOff) incrementCurrentInterval() {
+	// Check for overflow, if overflow is detected set the current interval to the max interval.
+	if float64(b.currentInterval) >= float64(b.MaxInterval)/b.Multiplier {
+		b.currentInterval = b.MaxInterval
+	} else {
+		b.currentInterval = time.Duration(float64(b.currentInterval) * b.Multiplier)
+	}
+}
+
+// Returns a random value from the following interval:
+// 	[randomizationFactor * currentInterval, randomizationFactor * currentInterval].
+func getRandomValueFromInterval(randomizationFactor, random float64, currentInterval time.Duration) time.Duration {
+	var delta = randomizationFactor * float64(currentInterval)
+	var minInterval = float64(currentInterval) - delta
+	var maxInterval = float64(currentInterval) + delta
+
+	// Get a random value from the range [minInterval, maxInterval].
+	// The formula used below has a +1 because if the minInterval is 1 and the maxInterval is 3 then
+	// we want a 33% chance for selecting either 1, 2 or 3.
+	return time.Duration(minInterval + (random * (maxInterval - minInterval + 1)))
+}
--- a/vendor/github.com/cenkalti/backoff/retry.go
+++ b/vendor/github.com/cenkalti/backoff/retry.go
@ -0,0 +1,78 @@
+package backoff
+
+import "time"
+
+// An Operation is executing by Retry() or RetryNotify().
+// The operation will be retried using a backoff policy if it returns an error.
+type Operation func() error
+
+// Notify is a notify-on-error function. It receives an operation error and
+// backoff delay if the operation failed (with an error).
+//
+// NOTE that if the backoff policy stated to stop retrying,
+// the notify function isn't called.
+type Notify func(error, time.Duration)
+
+// Retry the operation o until it does not return error or BackOff stops.
+// o is guaranteed to be run at least once.
+// It is the caller's responsibility to reset b after Retry returns.
+//
+// If o returns a *PermanentError, the operation is not retried, and the
+// wrapped error is returned.
+//
+// Retry sleeps the goroutine for the duration returned by BackOff after a
+// failed operation returns.
+func Retry(o Operation, b BackOff) error { return RetryNotify(o, b, nil) }
+
+// RetryNotify calls notify function with the error and wait duration
+// for each failed attempt before sleep.
+func RetryNotify(operation Operation, b BackOff, notify Notify) error {
+	var err error
+	var next time.Duration
+
+	cb := ensureContext(b)
+
+	b.Reset()
+	for {
+		if err = operation(); err == nil {
+			return nil
+		}
+
+		if permanent, ok := err.(*PermanentError); ok {
+			return permanent.Err
+		}
+
+		if next = b.NextBackOff(); next == Stop {
+			return err
+		}
+
+		if notify != nil {
+			notify(err, next)
+		}
+
+		t := time.NewTimer(next)
+
+		select {
+		case <-cb.Context().Done():
+			t.Stop()
+			return err
+		case <-t.C:
+		}
+	}
+}
+
+// PermanentError signals that the operation should not be retried.
+type PermanentError struct {
+	Err error
+}
+
+func (e *PermanentError) Error() string {
+	return e.Err.Error()
+}
+
+// Permanent wraps the given err in a *PermanentError.
+func Permanent(err error) *PermanentError {
+	return &PermanentError{
+		Err: err,
+	}
+}
--- a/vendor/github.com/cenkalti/backoff/ticker.go
+++ b/vendor/github.com/cenkalti/backoff/ticker.go
@ -0,0 +1,81 @@
+package backoff
+
+import (
+	"runtime"
+	"sync"
+	"time"
+)
+
+// Ticker holds a channel that delivers `ticks' of a clock at times reported by a BackOff.
+//
+// Ticks will continue to arrive when the previous operation is still running,
+// so operations that take a while to fail could run in quick succession.
+type Ticker struct {
+	C        <-chan time.Time
+	c        chan time.Time
+	b        BackOffContext
+	stop     chan struct{}
+	stopOnce sync.Once
+}
+
+// NewTicker returns a new Ticker containing a channel that will send the time at times
+// specified by the BackOff argument. Ticker is guaranteed to tick at least once.
+// The channel is closed when Stop method is called or BackOff stops.
+func NewTicker(b BackOff) *Ticker {
+	c := make(chan time.Time)
+	t := &Ticker{
+		C:    c,
+		c:    c,
+		b:    ensureContext(b),
+		stop: make(chan struct{}),
+	}
+	go t.run()
+	runtime.SetFinalizer(t, (*Ticker).Stop)
+	return t
+}
+
+// Stop turns off a ticker. After Stop, no more ticks will be sent.
+func (t *Ticker) Stop() {
+	t.stopOnce.Do(func() { close(t.stop) })
+}
+
+func (t *Ticker) run() {
+	c := t.c
+	defer close(c)
+	t.b.Reset()
+
+	// Ticker is guaranteed to tick at least once.
+	afterC := t.send(time.Now())
+
+	for {
+		if afterC == nil {
+			return
+		}
+
+		select {
+		case tick := <-afterC:
+			afterC = t.send(tick)
+		case <-t.stop:
+			t.c = nil // Prevent future ticks from being sent to the channel.
+			return
+		case <-t.b.Context().Done():
+			return
+		}
+	}
+}
+
+func (t *Ticker) send(tick time.Time) <-chan time.Time {
+	select {
+	case t.c <- tick:
+	case <-t.stop:
+		return nil
+	}
+
+	next := t.b.NextBackOff()
+	if next == Stop {
+		t.Stop()
+		return nil
+	}
+
+	return time.After(next)
+}
--- a/vendor/vendor.json
+++ b/vendor/vendor.json
@ -20,6 +20,12 @@
 			"revision": "4c0e84591b9aa9e6dcfdf3e020114cd81f89d5f9",
 			"revisionTime": "2016-08-04T10:47:26Z"
 		},
+		{
+			"checksumSHA1": "NfQBkfSVHEmnR2OaORxdOXmhLcs=",
+			"path": "github.com/cenkalti/backoff",
+			"revision": "5d150e7eec023ce7a124856b37c68e54b4050ac7",
+			"revisionTime": "2017-03-29T03:22:34Z"
+		},
 		{
 			"checksumSHA1": "jyYz6OqzmWw6CAfSc8WwjDB1S3k=",
 			"path": "github.com/emicklei/go-restful",