1
0
Fork 0
mirror of https://github.com/prometheus-operator/prometheus-operator.git synced 2025-04-21 19:49:46 +00:00

Merge remote-tracking branch 'upstream/master' into crd

This commit is contained in:
Goutham Veeramachaneni 2017-07-21 16:37:55 +05:30
commit c3f957d2ab
No known key found for this signature in database
GPG key ID: F1C217E8E9023CAD
80 changed files with 1281 additions and 476 deletions
CHANGELOG.md
Documentation
JenkinsfileMakefileVERSIONbundle.yaml
cmd
apidocgen
operator
contrib
example
non-rbac
rbac/prometheus-operator
helm
pkg
client/monitoring/v1alpha1
k8sutil
prometheus
scripts/jenkins
test/e2e
vendor

View file

@ -1,3 +1,15 @@
## 0.11.0 / 2017-07-20
Warning: This release deprecates the previously used storage definition in favor of upstream PersistentVolumeClaim templates. While this should not have an immediate effect on a running cluster, Prometheus object definitions that have storage configured need to be adapted. The previously existing fields are still there, but have no effect anymore.
* [FEATURE] Add Prometheus 2.0 alpha3 support.
* [FEATURE] Use PVC templates instead of custom storage definition.
* [FEATURE] Add cAdvisor port to kubelet sync.
* [FEATURE] Allow default base images to be configurable.
* [FEATURE] Configure Prometheus to only use necessary namespaces.
* [ENHANCEMENT] Improve rollout detection for Alertmanager clusters.
* [BUGFIX] Fix targetPort relabeling.
## 0.10.2 / 2017-06-21
* [BUGFIX] Use computed route prefix instead of directly from manifest.

View file

@ -6,7 +6,7 @@ This Document documents the types introduced by the Prometheus Operator to be co
## AlertingSpec
AlertingSpec defines paramters for alerting configuration of Prometheus servers.
AlertingSpec defines parameters for alerting configuration of Prometheus servers.
| Field | Description | Scheme | Required |
| ----- | ----------- | ------ | -------- |
@ -199,7 +199,7 @@ StorageSpec defines the configured storage for a group Prometheus servers.
| class | Name of the StorageClass to use when requesting storage provisioning. More info: https://kubernetes.io/docs/user-guide/persistent-volumes/#storageclasses DEPRECATED | string | true |
| selector | A label query over volumes to consider for binding. DEPRECATED | *[metav1.LabelSelector](https://kubernetes.io/docs/api-reference/v1.6/#labelselector-v1-meta) | true |
| resources | Resources represents the minimum resources the volume should have. More info: http://kubernetes.io/docs/user-guide/persistent-volumes#resources DEPRECATED | [v1.ResourceRequirements](https://kubernetes.io/docs/api-reference/v1.6/#resourcerequirements-v1-core) | true |
| volumeClaimTemplate | Pvc A pvc spec to be used by the Prometheus statefulsets. | v1.PersistentVolumeClaim | false |
| volumeClaimTemplate | A PVC spec to be used by the Prometheus StatefulSets. | [v1.PersistentVolumeClaim](https://kubernetes.io/docs/api-reference/v1.6/#persistentvolumeclaim-v1-core) | false |
## TLSConfig

View file

@ -43,7 +43,7 @@ spec:
serviceAccountName: prometheus-operator
containers:
- name: prometheus-operator
image: quay.io/coreos/prometheus-operator:v0.10.1
image: quay.io/coreos/prometheus-operator:v0.11.0
args:
- "--kubelet-service=kube-system/kubelet"
- "--config-reloader-image=quay.io/coreos/configmap-reload:v0.0.1"
@ -326,6 +326,8 @@ spec:
endpoints:
- port: http-metrics
interval: 30s
- port: cadvisor
interval: 30s
honorLabels: true
selector:
matchLabels:

View file

@ -85,7 +85,7 @@ spec:
serviceAccountName: prometheus-operator
containers:
- name: prometheus-operator
image: quay.io/coreos/prometheus-operator:v0.10.1
image: quay.io/coreos/prometheus-operator:v0.11.0
resources:
requests:
cpu: 100m

12
Jenkinsfile vendored
View file

@ -66,6 +66,12 @@ job('po-tests-pr') {
onlyIfBuildSucceeds(false)
onlyIfBuildFails(false)
}
postBuildScripts {
archiveArtifacts('build/**/*')
onlyIfBuildSucceeds(false)
onlyIfBuildFails(false)
}
wsCleanup()
}
}
@ -125,6 +131,11 @@ job('po-tests-master') {
onlyIfBuildSucceeds(false)
onlyIfBuildFails(false)
}
postBuildScripts {
archiveArtifacts('build/**/*')
onlyIfBuildSucceeds(false)
onlyIfBuildFails(false)
}
slackNotifier {
room('#team-monitoring')
teamDomain('coreos')
@ -133,5 +144,6 @@ job('po-tests-master') {
notifyRegression(true)
notifyRepeatedFailure(true)
}
wsCleanup()
}
}

View file

@ -1,12 +1,11 @@
REPO?=quay.io/coreos/prometheus-operator
TAG?=$(shell git rev-parse --short HEAD)
NAMESPACE?=po-e2e-$(shell LC_CTYPE=C tr -dc a-z0-9 < /dev/urandom | head -c 13 ; echo '')
KUBECONFIG?=$(HOME)/.kube/config
PROMU := $(GOPATH)/bin/promu
PREFIX ?= $(shell pwd)
CLUSTER_IP?=$(shell kubectl config view --minify | grep server: | cut -f 3 -d ":" | tr -d "//")
pkgs = $(shell go list ./... | grep -v /vendor/ | grep -v /test/)
all: check-license format build test
@ -31,7 +30,7 @@ container:
e2e-test:
go test -timeout 20m -v ./test/migration/ $(TEST_RUN_ARGS) --kubeconfig "$(HOME)/.kube/config" --operator-image=$(REPO):$(TAG) --namespace=$(NAMESPACE) --cluster-ip=$(CLUSTER_IP)
go test -timeout 20m -v ./test/e2e/ $(TEST_RUN_ARGS) --kubeconfig "$(HOME)/.kube/config" --operator-image=$(REPO):$(TAG) --namespace=$(NAMESPACE) --cluster-ip=$(CLUSTER_IP)
go test -timeout 20m -v ./test/e2e/ $(TEST_RUN_ARGS) --kubeconfig=$(KUBECONFIG) --operator-image=$(REPO):$(TAG) --namespace=$(NAMESPACE)
e2e-status:
kubectl get prometheus,alertmanager,servicemonitor,statefulsets,deploy,svc,endpoints,pods,cm,secrets,replicationcontrollers --all-namespaces
@ -54,8 +53,8 @@ apidocgen:
@go install github.com/coreos/prometheus-operator/cmd/apidocgen
docs: embedmd apidocgen
embedmd -w `find Documentation -name "*.md"`
apidocgen pkg/client/monitoring/v1alpha1/types.go > Documentation/api.md
$(GOPATH)/bin/embedmd -w `find Documentation -name "*.md"`
$(GOPATH)/bin/apidocgen pkg/client/monitoring/v1alpha1/types.go > Documentation/api.md
generate:
hack/generate.sh

View file

@ -1,2 +1,2 @@
0.10.1
0.11.0

View file

@ -75,7 +75,7 @@ spec:
serviceAccountName: prometheus-operator
containers:
- name: prometheus-operator
image: quay.io/coreos/prometheus-operator:v0.10.1
image: quay.io/coreos/prometheus-operator:v0.11.0
resources:
requests:
cpu: 100m

View file

@ -42,6 +42,7 @@ var (
"v1.ResourceRequirements": "https://kubernetes.io/docs/api-reference/v1.6/#resourcerequirements-v1-core",
"v1.LocalObjectReference": "https://kubernetes.io/docs/api-reference/v1.6/#localobjectreference-v1-core",
"v1.SecretKeySelector": "https://kubernetes.io/docs/api-reference/v1.6/#secretkeyselector-v1-core",
"v1.PersistentVolumeClaim": "https://kubernetes.io/docs/api-reference/v1.6/#persistentvolumeclaim-v1-core",
}
selfLinks = map[string]string{}

View file

@ -52,7 +52,7 @@ func init() {
flagset.StringVar(&cfg.KubeletObject, "kubelet-service", "", "Service/Endpoints object to write kubelets into in format \"namespace/name\"")
flagset.BoolVar(&cfg.TLSInsecure, "tls-insecure", false, "- NOT RECOMMENDED FOR PRODUCTION - Don't verify API server's CA certificate.")
flagset.BoolVar(&analyticsEnabled, "analytics", true, "Send analytical event (Cluster Created/Deleted etc.) to Google Analytics")
flagset.StringVar(&cfg.PrometheusConfigReloader, "prometheus-config-reloader", "quay.io/coreos/prometheus-config-reloader:v0.0.1", "Config and rule reload image")
flagset.StringVar(&cfg.PrometheusConfigReloader, "prometheus-config-reloader", "quay.io/coreos/prometheus-config-reloader:v0.0.2", "Config and rule reload image")
flagset.StringVar(&cfg.ConfigReloaderImage, "config-reloader-image", "quay.io/coreos/configmap-reload:v0.0.1", "Reload Image")
flagset.StringVar(&cfg.AlertmanagerDefaultBaseImage, "alertmanager-default-base-image", "quay.io/prometheus/alertmanager", "Alertmanager default base image")
flagset.StringVar(&cfg.PrometheusDefaultBaseImage, "prometheus-default-base-image", "quay.io/prometheus/prometheus", "Prometheus default base image")

View file

@ -3,7 +3,7 @@ all: build
FLAGS =
ENVVAR = GOOS=linux GOARCH=amd64 CGO_ENABLED=0
REGISTRY = quay.io/coreos
TAG = v0.0.4
TAG = v0.0.5
NAME = grafana-watcher
build:

View file

@ -1,8 +1,6 @@
apiVersion: v1
kind: Service
metadata:
annotations:
prometheus.io/scrape: 'true'
labels:
app: grafana
name: grafana
@ -75,8 +73,7 @@ spec:
memory: 200Mi
cpu: 200m
- name: grafana-watcher
image: quay.io/coreos/grafana-watcher:v0.0.4
imagePullPolicy: Never
image: quay.io/coreos/grafana-watcher:v0.0.5
args:
- '--watch-dir=/var/grafana-dashboards'
- '--grafana-url=http://localhost:3000'
@ -110,4 +107,3 @@ spec:
- name: grafana-dashboards
configMap:
name: grafana-dashboards

View file

@ -82,7 +82,7 @@ func (u *GrafanaDatasourceUpdater) createDatasourcesFromFiles() error {
}
for _, fp := range filePaths {
u.createDatasourceFromFile(fp)
err = u.createDatasourceFromFile(fp)
if err != nil {
return err
}

View file

@ -1,14 +1,14 @@
### Up Alerting ###
Alert TargetDown
IF 100 * (count(up == 0) / count(up)) > 3
IF 100 * (count by(job) (up == 0) / count by(job) (up)) > 10
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "Targets are down",
description = "More than {{ $value }}% of targets are down."
description = "{{ $value }}% or more of {{ $labels.job }} targets are down."
}
### Dead man's switch ###

View file

@ -1,5 +1,5 @@
ALERT K8SApiserverDown
IF absent({job="apiserver"}) or (count by(cluster) (up{job="apiserver"} == 1) < count by(cluster) (up{job="apiserver"}))
IF absent(up{job="apiserver"} == 1)
FOR 5m
LABELS {
severity = "critical"

View file

@ -1,5 +1,5 @@
ALERT K8SControllerManagerDown
IF absent(up{job="kube-controller-manager"}) or (count by(cluster) (up{job="kube-controller-manager"} == 1) == 0)
IF absent(up{job="kube-controller-manager"} == 1)
FOR 5m
LABELS {
severity = "critical",
@ -7,4 +7,5 @@ ALERT K8SControllerManagerDown
ANNOTATIONS {
summary = "Controller manager is down",
description = "There is no running K8S controller manager. Deployments and replication controllers are not making progress.",
runbook = "https://coreos.com/tectonic/docs/latest/troubleshooting/controller-recovery.html#recovering-a-controller-manager",
}

View file

@ -1,5 +1,5 @@
ALERT K8SSchedulerDown
IF absent(up{job="kube-scheduler"}) or (count by(cluster) (up{job="kube-scheduler"} == 1) == 0)
IF absent(up{job="kube-scheduler"} == 1)
FOR 5m
LABELS {
severity = "critical",
@ -7,4 +7,5 @@ ALERT K8SSchedulerDown
ANNOTATIONS {
summary = "Scheduler is down",
description = "There is no running K8S scheduler. New pods are not being assigned to nodes.",
runbook = "https://coreos.com/tectonic/docs/latest/troubleshooting/controller-recovery.html#recovering-a-scheduler",
}

View file

@ -11,24 +11,24 @@ ALERT K8SNodeNotReady
ALERT K8SManyNodesNotReady
IF
count by (cluster) (kube_node_status_ready{condition="true"} == 0) > 1
count(kube_node_status_ready{condition="true"} == 0) > 1
AND
(
count by (cluster) (kube_node_status_ready{condition="true"} == 0)
count(kube_node_status_ready{condition="true"} == 0)
/
count by (cluster) (kube_node_status_ready{condition="true"})
count(kube_node_status_ready{condition="true"})
) > 0.2
FOR 1m
LABELS {
severity = "critical",
}
ANNOTATIONS {
summary = "Many K8s nodes are Not Ready",
description = "{{ $value }} K8s nodes (more than 10% of cluster {{ $labels.cluster }}) are in the NotReady state.",
summary = "Many Kubernetes nodes are Not Ready",
description = "{{ $value }} Kubernetes nodes (more than 10% are in the NotReady state).",
}
ALERT K8SKubeletDown
IF count by (cluster) (up{job="kubelet"} == 0) / count by (cluster) (up{job="kubelet"}) > 0.03
IF count(up{job="kubelet"} == 0) / count(up{job="kubelet"}) > 0.03
FOR 1h
LABELS {
severity = "warning",
@ -39,7 +39,7 @@ ALERT K8SKubeletDown
}
ALERT K8SKubeletDown
IF absent(up{job="kubelet"}) or count by (cluster) (up{job="kubelet"} == 0) / count by (cluster) (up{job="kubelet"}) > 0.1
IF absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"}) > 0.1
FOR 1h
LABELS {
severity = "critical",

View file

@ -1,10 +1,10 @@
ALERT NodeExporterDown
IF up{job="node-exporter"} == 0
IF absent(up{job="node-exporter"} == 1)
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "node-exporter cannot be scraped",
description = "Prometheus could not scrape a node-exporter for more than 10m.",
description = "Prometheus could not scrape a node-exporter for more than 10m, or node-exporters have disappeared from discovery.",
}

View file

@ -41,7 +41,7 @@ spec:
memory: 200Mi
cpu: 200m
- name: grafana-watcher
image: quay.io/coreos/grafana-watcher:v0.0.4
image: quay.io/coreos/grafana-watcher:v0.0.5
args:
- '--watch-dir=/var/grafana-dashboards'
- '--grafana-url=http://localhost:3000'
@ -56,9 +56,6 @@ spec:
secretKeyRef:
name: grafana-credentials
key: password
volumeMounts:
- name: grafana-dashboards
mountPath: /var/grafana-dashboards
resources:
requests:
memory: "16Mi"

View file

@ -7,7 +7,10 @@ rules:
resources:
- nodes
- pods
- services
- resourcequotas
- replicationcontrollers
- limitranges
verbs: ["list", "watch"]
- apiGroups: ["extensions"]
resources:

View file

@ -14,7 +14,7 @@ spec:
serviceAccountName: prometheus-operator
containers:
- name: prometheus-operator
image: quay.io/coreos/prometheus-operator:v0.10.1
image: quay.io/coreos/prometheus-operator:v0.11.0
args:
- "--kubelet-service=kube-system/kubelet"
- "--config-reloader-image=quay.io/coreos/configmap-reload:v0.0.1"

View file

@ -225,14 +225,14 @@ data:
### Up Alerting ###
Alert TargetDown
IF 100 * (count(up == 0) / count(up)) > 3
IF 100 * (count by(job) (up == 0) / count by(job) (up)) > 10
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "Targets are down",
description = "More than {{ $value }}% of targets are down."
description = "{{ $value }}% or more of {{ $labels.job }} targets are down."
}
### Dead man's switch ###
@ -287,7 +287,7 @@ data:
}
kube-apiserver.rules: |+
ALERT K8SApiserverDown
IF absent({job="apiserver"}) or (count by(cluster) (up{job="apiserver"} == 1) < count by(cluster) (up{job="apiserver"}))
IF absent(up{job="apiserver"} == 1)
FOR 5m
LABELS {
severity = "critical"
@ -316,7 +316,7 @@ data:
}
kube-controller-manager.rules: |+
ALERT K8SControllerManagerDown
IF absent(up{job="kube-controller-manager"}) or (count by(cluster) (up{job="kube-controller-manager"} == 1) == 0)
IF absent(up{job="kube-controller-manager"} == 1)
FOR 5m
LABELS {
severity = "critical",
@ -324,6 +324,7 @@ data:
ANNOTATIONS {
summary = "Controller manager is down",
description = "There is no running K8S controller manager. Deployments and replication controllers are not making progress.",
runbook = "https://coreos.com/tectonic/docs/latest/troubleshooting/controller-recovery.html#recovering-a-controller-manager",
}
kubelet.rules: |+
ALERT K8SNodeNotReady
@ -339,24 +340,24 @@ data:
ALERT K8SManyNodesNotReady
IF
count by (cluster) (kube_node_status_ready{condition="true"} == 0) > 1
count(kube_node_status_ready{condition="true"} == 0) > 1
AND
(
count by (cluster) (kube_node_status_ready{condition="true"} == 0)
count(kube_node_status_ready{condition="true"} == 0)
/
count by (cluster) (kube_node_status_ready{condition="true"})
count(kube_node_status_ready{condition="true"})
) > 0.2
FOR 1m
LABELS {
severity = "critical",
}
ANNOTATIONS {
summary = "Many K8s nodes are Not Ready",
description = "{{ $value }} K8s nodes (more than 10% of cluster {{ $labels.cluster }}) are in the NotReady state.",
summary = "Many Kubernetes nodes are Not Ready",
description = "{{ $value }} Kubernetes nodes (more than 10% are in the NotReady state).",
}
ALERT K8SKubeletDown
IF count by (cluster) (up{job="kubelet"} == 0) / count by (cluster) (up{job="kubelet"}) > 0.03
IF count(up{job="kubelet"} == 0) / count(up{job="kubelet"}) > 0.03
FOR 1h
LABELS {
severity = "warning",
@ -367,7 +368,7 @@ data:
}
ALERT K8SKubeletDown
IF absent(up{job="kubelet"}) or count by (cluster) (up{job="kubelet"} == 0) / count by (cluster) (up{job="kubelet"}) > 0.1
IF absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"}) > 0.1
FOR 1h
LABELS {
severity = "critical",
@ -560,7 +561,7 @@ data:
histogram_quantile(0.5,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6
kube-scheduler.rules: |+
ALERT K8SSchedulerDown
IF absent(up{job="kube-scheduler"}) or (count by(cluster) (up{job="kube-scheduler"} == 1) == 0)
IF absent(up{job="kube-scheduler"} == 1)
FOR 5m
LABELS {
severity = "critical",
@ -568,17 +569,18 @@ data:
ANNOTATIONS {
summary = "Scheduler is down",
description = "There is no running K8S scheduler. New pods are not being assigned to nodes.",
runbook = "https://coreos.com/tectonic/docs/latest/troubleshooting/controller-recovery.html#recovering-a-scheduler",
}
node.rules: |+
ALERT NodeExporterDown
IF up{job="node-exporter"} == 0
IF absent(up{job="node-exporter"} == 1)
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "node-exporter cannot be scraped",
description = "Prometheus could not scrape a node-exporter for more than 10m.",
description = "Prometheus could not scrape a node-exporter for more than 10m, or node-exporters have disappeared from discovery.",
}
prometheus.rules: |+
ALERT FailedReload

View file

@ -9,6 +9,8 @@ spec:
endpoints:
- port: http-metrics
interval: 30s
- port: cadvisor
interval: 30s
honorLabels: true
selector:
matchLabels:

View file

@ -4,7 +4,7 @@ FLAGS =
ENVVAR = GOOS=linux GOARCH=amd64 CGO_ENABLED=0
NAME = prometheus-config-reloader
REPO = quay.io/coreos/$(NAME)
TAG = v0.0.1
TAG = v0.0.2
IMAGE = $(REPO):$(TAG)
build:

View file

@ -30,7 +30,7 @@ spec:
mountPath: /etc/prometheus/rules
readOnly: true
- name: prometheus-config-reloader
image: quay.io/coreos/prometheus-config-reloader:v0.0.1
image: quay.io/coreos/prometheus-config-reloader:v0.0.2
args:
- '-config-volume-dir=/etc/prometheus/config'
- '-rule-volume-dir=/etc/prometheus/rules'

View file

@ -24,9 +24,11 @@ import (
"os"
"path/filepath"
"strings"
"time"
fsnotify "gopkg.in/fsnotify.v1"
"github.com/cenkalti/backoff"
"github.com/ericchiang/k8s"
"github.com/go-kit/kit/log"
)
@ -208,7 +210,9 @@ func (w *volumeWatcher) Refresh() {
}
w.logger.Log("msg", "Reloading Prometheus...")
err = w.ReloadPrometheus()
err = backoff.RetryNotify(w.ReloadPrometheus, backoff.NewExponentialBackOff(), func(err error, next time.Duration) {
w.logger.Log("msg", "Reloading Prometheus temporarily failed.", "err", err, "next-retry", next)
})
if err != nil {
w.logger.Log("msg", "Reloading Prometheus failed.", "err", err)
} else {

View file

@ -13,7 +13,7 @@ spec:
spec:
containers:
- name: prometheus-operator
image: quay.io/coreos/prometheus-operator:v0.10.1
image: quay.io/coreos/prometheus-operator:v0.11.0
resources:
requests:
cpu: 100m

View file

@ -14,7 +14,7 @@ spec:
serviceAccountName: prometheus-operator
containers:
- name: prometheus-operator
image: quay.io/coreos/prometheus-operator:v0.10.1
image: quay.io/coreos/prometheus-operator:v0.11.0
resources:
requests:
cpu: 100m

View file

@ -7,4 +7,4 @@ maintainers:
name: alertmanager
sources:
- https://github.com/coreos/prometheus-operator
version: 0.0.1
version: 0.0.3

View file

@ -76,5 +76,6 @@ $ helm install opsgoodness/alertmanager --name my-release -f values.yaml
> **Tip**: You can use the default [values.yaml](values.yaml)
### Third-party Resource Documentation
- [alertmanager](https://github.com/coreos/prometheus-operator/blob/master/Documentation/alertmanager.md)
- [prometheus](https://github.com/coreos/prometheus-operator/blob/master/Documentation/prometheus.md)
- [Alertmanager](/Documentation/design.md#alertmanager)
- [Prometheus](/Documentation/design.md#prometheus)
- [ServiceMonitor](/Documentation/design.md#servicemonitor)

View file

@ -17,14 +17,14 @@ spec:
{{- else }}
externalUrl: http://{{ template "fullname" . }}.{{ .Release.Namespace }}:9093
{{- end }}
# {{- if .Values.nodeSelector }}
# nodeSelector:
# {{ toYaml .Values.nodeSelector | indent 4 }}
# {{- end }}
{{- if .Values.nodeSelector }}
nodeSelector:
{{ toYaml .Values.nodeSelector | indent 4 }}
{{- end }}
paused: {{ .Values.paused }}
replicas: {{ .Values.replicaCount }}
# resources:
# {{ toYaml .Values.resources | indent 4 }}
resources:
{{ toYaml .Values.resources | indent 4 }}
{{- if .Values.storageSpec }}
storage:
{{ toYaml .Values.storageSpec | indent 4 }}

View file

@ -25,7 +25,7 @@ externalUrl: ""
##
image:
repository: quay.io/prometheus/alertmanager
tag: v0.5.1
tag: v0.7.1
ingress:
## If true, Alertmanager Ingress will be created
@ -100,8 +100,12 @@ service:
##
type: ClusterIP
## If true, create & use RBAC resources
##
rbacEnable: true
## Alertmanager StorageSpec for persistent data
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/prometheus.md#storagespec
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/user-guides/storage.md
##
storageSpec: {}
# class: default

View file

@ -102,7 +102,7 @@ alertmanager:
type: ClusterIP
## Alertmanager StorageSpec for persistent data
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/prometheus.md#storagespec
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/user-guides/storage.md
##
storageSpec: {}
# class: default
@ -113,7 +113,7 @@ alertmanager:
prometheus:
## Alertmanagers to which alerts will be sent
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/prometheus.md#alertmanagerendpoints
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#alertmanagerendpoints
##
alertingEndpoints: []
# - name: ""
@ -319,7 +319,7 @@ prometheus:
# serverName: ""
## Prometheus StorageSpec for persistent data
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/prometheus.md#storagespec
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/user-guides/storage.md
##
storageSpec: {}
# class: default

View file

@ -7,4 +7,4 @@ maintainers:
name: prometheus-operator
sources:
- https://github.com/coreos/prometheus-operator
version: 0.0.5
version: 0.0.6

View file

@ -90,7 +90,7 @@ Parameter | Description | Default
`kubeletService.name` | The name of the kubelet service to be created | `kubelet`
`nodeSelector` | Node labels for pod assignment | `{}`
`prometheusConfigReloader.repository` | prometheus-config-reloader image | `quay.io/coreos/prometheus-config-reloader`
`prometheusConfigReloader.tag` | prometheus-config-reloader tag | `v0.0.1`
`prometheusConfigReloader.tag` | prometheus-config-reloader tag | `v0.0.2`
`rbacEnable` | If true, create & use RBAC resources | `true`
`resources` | Pod resource requests & limits | `{}`
`sendAnalytics` | Collect & send anonymous usage statistics | `true`

View file

@ -10,7 +10,7 @@ global:
##
prometheusConfigReloader:
repository: quay.io/coreos/prometheus-config-reloader
tag: v0.0.1
tag: v0.0.2
## Configmap-reload image to use for reloading configmaps
##
@ -22,7 +22,7 @@ configmapReload:
##
image:
repository: quay.io/coreos/prometheus-operator
tag: v0.9.1
tag: v0.10.1
pullPolicy: IfNotPresent
## If enabled, prometheus-operator will create a service for scraping kubelets

View file

@ -7,4 +7,4 @@ maintainers:
name: prometheus
sources:
- https://github.com/coreos/prometheus-operator
version: 0.0.1
version: 0.0.3

View file

@ -44,6 +44,7 @@ Parameter | Description | Default
--- | --- | ---
`alertingEndpoints` | Alertmanagers to which alerts will be sent | `[]`
`config` | Prometheus configuration directives | `{}`
`externalLabels` | The labels to add to any time series or alerts when communicating with external systems | `{}`
`externalUrl` | External URL at which Prometheus will be reachable | `""`
`image.repository` | Image | `quay.io/prometheus/prometheus`
`image.tag` | Image tag | `v1.5.2`
@ -81,5 +82,6 @@ $ helm install opsgoodness/prometheus --name my-release -f values.yaml
> **Tip**: You can use the default [values.yaml](values.yaml)
### Third-party Resource Documentation
- [prometheus](https://github.com/coreos/prometheus-operator/blob/master/Documentation/prometheus.md)
- [servicemonitor](https://github.com/coreos/prometheus-operator/blob/master/Documentation/service-monitor.md)
- [Alertmanager](/Documentation/design.md#alertmanager)
- [Prometheus](/Documentation/design.md#prometheus)
- [ServiceMonitor](/Documentation/design.md#servicemonitor)

View file

@ -0,0 +1,29 @@
{{- if .Values.rbacEnable }}
{{- if .Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1beta1" }}
apiVersion: rbac.authorization.k8s.io/v1beta1
{{- else if .Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1alpha1" }}
apiVersion: rbac.authorization.k8s.io/v1alpha1
{{- end }}
kind: ClusterRole
metadata:
labels:
app: {{ template "name" . }}
chart: {{ .Chart.Name }}-{{ .Chart.Version }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "fullname" . }}
rules:
- apiGroups: [""]
resources:
- nodes
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources:
- configmaps
verbs: ["get"]
- nonResourceURLs: ["/metrics"]
verbs: ["get"]
{{- end }}

View file

@ -0,0 +1,24 @@
{{- if .Values.rbacEnable }}
{{- if .Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1beta1" }}
apiVersion: rbac.authorization.k8s.io/v1beta1
{{- else if .Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1alpha1" }}
apiVersion: rbac.authorization.k8s.io/v1alpha1
{{- end }}
kind: ClusterRoleBinding
metadata:
labels:
app: {{ template "name" . }}
chart: {{ .Chart.Name }}-{{ .Chart.Version }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "fullname" . }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: {{ template "fullname" . }}
subjects:
- kind: ServiceAccount
name: {{ template "fullname" . }}
namespace: {{ .Release.Namespace }}
{{- end }}

View file

@ -21,6 +21,10 @@ spec:
port: http
{{- end }}
baseImage: "{{ .Values.image.repository }}"
{{- if .Values.externalLabels }}
externalLabels:
{{ toYaml .Values.externalLabels | indent 4}}
{{- end }}
{{- if .Values.externalUrl }}
externalUrl: "{{ .Values.externalUrl }}"
{{- else if .Values.ingress.fqdn }}
@ -40,6 +44,9 @@ spec:
{{- if .Values.routePrefix }}
routePrefix: "{{ .Values.routePrefix }}"
{{- end }}
{{- if .Values.rbacEnable }}
serviceAccountName: {{ template "fullname" . }}
{{- end }}
{{- if .Values.serviceMonitorsSelector }}
serviceMonitorSelector:
matchLabels:

View file

@ -0,0 +1,11 @@
{{- if .Values.rbacEnable }}
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app: {{ template "name" . }}
chart: {{ .Chart.Name }}-{{ .Chart.Version }}
heritage: {{ .Release.Service }}
release: {{ .Release.Name }}
name: {{ template "fullname" . }}
{{- end }}

View file

@ -1,5 +1,5 @@
## Alertmanagers to which alerts will be sent
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/prometheus.md#alertmanagerendpoints
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#alertmanagerendpoints
##
alertingEndpoints: []
# - name: ""
@ -15,6 +15,10 @@ config:
specifiedInValues: true
value: {}
## External labels to add to any time series or alerts when communicating with external systems
##
externalLabels: {}
## External URL at which Prometheus will be reachable
##
externalUrl: ""
@ -23,7 +27,7 @@ externalUrl: ""
##
image:
repository: quay.io/prometheus/prometheus
tag: v1.5.2
tag: v1.7.1
ingress:
## If true, Prometheus Ingress will be created
@ -55,6 +59,10 @@ nodeSelector: {}
##
paused: false
## If true, create & use RBAC resources
##
rbacEnable: true
## Number of Prometheus replicas desired
##
replicaCount: 1
@ -205,7 +213,7 @@ serviceMonitors: []
# serverName: ""
## Prometheus StorageSpec for persistent data
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/prometheus.md#storagespec
## Ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/user-guides/storage.md
##
storageSpec: {}
# class: default

View file

@ -125,7 +125,7 @@ type PrometheusStatus struct {
UnavailableReplicas int32 `json:"unavailableReplicas"`
}
// AlertingSpec defines paramters for alerting configuration of Prometheus servers.
// AlertingSpec defines parameters for alerting configuration of Prometheus servers.
type AlertingSpec struct {
// AlertmanagerEndpoints Prometheus should fire alerts against.
Alertmanagers []AlertmanagerEndpoints `json:"alertmanagers"`
@ -144,7 +144,7 @@ type StorageSpec struct {
// info: http://kubernetes.io/docs/user-guide/persistent-volumes#resources
// DEPRECATED
Resources v1.ResourceRequirements `json:"resources"`
// Pvc A pvc spec to be used by the Prometheus statefulsets.
// A PVC spec to be used by the Prometheus StatefulSets.
VolumeClaimTemplate v1.PersistentVolumeClaim `json:"volumeClaimTemplate,omitempty"`
}

View file

@ -35,7 +35,7 @@ import (
// for use.
// TODO(gouthamve): Move to clientset.Get()
func WaitForCRDReady(restClient rest.Interface, crdGroup, crdVersion, crdName string) error {
return wait.Poll(3*time.Second, 30*time.Second, func() (bool, error) {
err := wait.Poll(3*time.Second, 5*time.Minute, func() (bool, error) {
res := restClient.Get().AbsPath("apis", crdGroup, crdVersion, crdName).Do()
err := res.Error()
if err != nil {
@ -57,6 +57,8 @@ func WaitForCRDReady(restClient rest.Interface, crdGroup, crdVersion, crdName st
return true, nil
})
return errors.Wrap(err, fmt.Sprintf("timed out waiting for TPR %s", crdName))
}
// PodRunningAndReady returns whether a pod is running and each container has

View file

@ -357,16 +357,20 @@ func (c *Operator) syncNodeEndpoints() {
},
},
Subsets: []v1.EndpointSubset{
v1.EndpointSubset{
{
Ports: []v1.EndpointPort{
v1.EndpointPort{
{
Name: "https-metrics",
Port: 10250,
},
v1.EndpointPort{
{
Name: "http-metrics",
Port: 10255,
},
{
Name: "cadvisor",
Port: 4194,
},
},
},
},
@ -402,7 +406,7 @@ func (c *Operator) syncNodeEndpoints() {
Type: v1.ServiceTypeClusterIP,
ClusterIP: "None",
Ports: []v1.ServicePort{
v1.ServicePort{
{
Name: "https-metrics",
Port: 10250,
},

View file

@ -53,7 +53,7 @@ func stringMapToMapSlice(m map[string]string) yaml.MapSlice {
func generateConfig(p *v1alpha1.Prometheus, mons map[string]*v1alpha1.ServiceMonitor, ruleConfigMaps int, basicAuthSecrets map[string]BasicAuthCredentials) ([]byte, error) {
versionStr := p.Spec.Version
if versionStr == "" {
versionStr = defaultVersion
versionStr = DefaultVersion
}
version, err := semver.Parse(strings.TrimLeft(versionStr, "v"))
@ -279,13 +279,13 @@ func generateServiceMonitorConfig(version semver.Version, m *v1alpha1.ServiceMon
} else if ep.TargetPort.StrVal != "" {
relabelings = append(relabelings, yaml.MapSlice{
{Key: "action", Value: "keep"},
{Key: "source_labels", Value: []string{"__meta_kubernetes_container_port_name"}},
{Key: "source_labels", Value: []string{"__meta_kubernetes_pod_container_port_name"}},
{Key: "regex", Value: ep.TargetPort.String()},
})
} else if ep.TargetPort.IntVal != 0 {
relabelings = append(relabelings, yaml.MapSlice{
{Key: "action", Value: "keep"},
{Key: "source_labels", Value: []string{"__meta_kubernetes_container_port_number"}},
{Key: "source_labels", Value: []string{"__meta_kubernetes_pod_container_port_number"}},
{Key: "regex", Value: ep.TargetPort.String()},
})
}

View file

@ -37,7 +37,7 @@ import (
const (
governingServiceName = "prometheus-operated"
defaultVersion = "v1.7.0"
DefaultVersion = "v1.7.1"
defaultRetention = "24h"
configMapsFilename = "configmaps.json"
@ -78,7 +78,7 @@ func makeStatefulSet(p v1alpha1.Prometheus, old *v1beta1.StatefulSet, config *Co
p.Spec.BaseImage = config.PrometheusDefaultBaseImage
}
if p.Spec.Version == "" {
p.Spec.Version = defaultVersion
p.Spec.Version = DefaultVersion
}
if p.Spec.Replicas != nil && *p.Spec.Replicas < minReplicas {
p.Spec.Replicas = &minReplicas
@ -123,6 +123,8 @@ func makeStatefulSet(p v1alpha1.Prometheus, old *v1beta1.StatefulSet, config *Co
pvcTemplate := storageSpec.VolumeClaimTemplate
pvcTemplate.Name = volumeName(p.Name)
pvcTemplate.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce}
pvcTemplate.Spec.Resources = storageSpec.VolumeClaimTemplate.Spec.Resources
pvcTemplate.Spec.Selector = storageSpec.VolumeClaimTemplate.Spec.Selector
statefulset.Spec.VolumeClaimTemplates = append(statefulset.Spec.VolumeClaimTemplates, pvcTemplate)
}

View file

@ -1,38 +0,0 @@
FROM golang:1.8.1-stretch
ENV TERRAFORM_VERSION 0.8.7
ENV KOPS_VERSION 1.5.1
ENV DOCKER_VERSION 1.13.1
RUN echo "deb http://ftp.debian.org/debian wheezy-backports main" >> /etc/apt/sources.list
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
wget \
unzip \
python python-pip jq python-setuptools \
&& rm -rf /var/lib/apt/lists/*
RUN curl https://get.docker.com/builds/Linux/x86_64/docker-${DOCKER_VERSION}.tgz | tar -xvz && \
mv docker/docker /usr/local/bin/docker && \
chmod +x /usr/local/bin/docker && \
rm -r docker
RUN wget -q -O /terraform.zip "https://releases.hashicorp.com/terraform/${TERRAFORM_VERSION}/terraform_${TERRAFORM_VERSION}_linux_amd64.zip" && \
unzip /terraform.zip -d /bin
RUN wget -q -O /kops "https://github.com/kubernetes/kops/releases/download/${KOPS_VERSION}/kops-linux-amd64" && \
chmod +x /kops && \
mv /kops /bin
RUN curl "https://s3.amazonaws.com/aws-cli/awscli-bundle.zip" -o "awscli-bundle.zip" && \
unzip awscli-bundle.zip && \
./awscli-bundle/install -i /usr/local/aws -b /bin/aws && \
rm -r awscli-bundle awscli-bundle.zip
RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl && \
chmod +x ./kubectl && \
mv ./kubectl /bin/kubectl
RUN pip install yq

View file

@ -1,55 +0,0 @@
CLUSTER_NAME ?= prom-test-$(shell whoami)
DOMAIN ?= dev.coreos.systems
AMOUNT_NODES = $$(($(shell cat manifests/kops/regular-ig.yaml | yq '.spec.minSize')+1))
path ?= clusters/${CLUSTER_NAME}
build_path := $(path)/.build
aws_region = eu-west-1
KOPS_CMD = kops --state $(shell terraform output -state "$(build_path)/terraform.tfstate" kops_state_bucket)
TERRAFORM_FLAGS = -var "dns_domain=$(DOMAIN)" -var "cluster_name=$(CLUSTER_NAME)" -state "$(build_path)/terraform.tfstate"
all: check-deps gen-ssh cluster wait-for-cluster run-e2e-tests
check-deps:
@which aws || echo "AWS cli is missing."
@which kops || echo "Kops is missing."
@which kubectl || echo "Kubectl is missing."
@which terraform || echo "Terraform is missing."
@which jq || echo "jq is missing."
@which yq || echo "yq is missing."
clean: clean-cluster clean-aws-deps
gen-ssh:
ssh-keygen -t rsa -N "" -f /root/.ssh/id_rsa -q
aws-deps:
AWS_REGION=$(aws_region) terraform apply $(TERRAFORM_FLAGS) ./templates
cluster: aws-deps
$(KOPS_CMD) get cluster | grep -v $(CLUSTER_NAME).$(DOMAIN) || \
$(KOPS_CMD) create cluster \
--name $(CLUSTER_NAME).$(DOMAIN) \
--cloud aws --zones $(aws_region)a --kubernetes-version 1.5.2 \
--master-size t2.medium --yes \
--master-security-groups $(shell terraform output -state "$(build_path)/terraform.tfstate" kops_master_security_group) \
--node-security-groups $(shell terraform output -state "$(build_path)/terraform.tfstate" kops_master_security_group) \
--vpc $(shell terraform output -state "$(build_path)/terraform.tfstate" kops_main_vpc)
EDITOR='./ed.sh manifests/kops/regular-ig.yaml' $(KOPS_CMD) edit ig nodes
$(KOPS_CMD) update cluster --yes
run-e2e-tests:
$(MAKE) -C ../../ e2e-test
wait-for-cluster:
timeout 1800 ./wait-for-cluster.sh $(AMOUNT_NODES)
clean-cluster:
$(KOPS_CMD) delete cluster --name $(CLUSTER_NAME).$(DOMAIN) --yes
clean-aws-deps:
AWS_REGION=$(aws_region) terraform destroy -force $(TERRAFORM_FLAGS) ./templates
rm -f $(build_path)/terraform.tfstate*
.PHONY: all check-deps clean gen-ssh aws-deps cluster run-e2e-tests wait-for-cluster clean-cluster clean-aws-deps

View file

@ -0,0 +1,8 @@
FROM golang:1.8-stretch
ENV DOCKER_VERSION 1.13.1
RUN curl https://get.docker.com/builds/Linux/x86_64/docker-${DOCKER_VERSION}.tgz | tar -xvz && \
mv docker/docker /usr/local/bin/docker && \
chmod +x /usr/local/bin/docker && \
rm -r docker

View file

@ -1,14 +0,0 @@
#!/bin/bash
# Kops requires user input through an editor to update a ressource. Instead of
# interacting with an editor we give Kops a fake editor via the 'EDITOR' env
# var. This editor always writes the content of file '$1' into file '$2'. In the
# Makefile before calling 'kops edit ig nodes' we set the 'EDITOR' env var to
# this script with the wanted file as the first argument. The second argument
# which is the file that is supposed to be edited by the user is passed in by
# kops later.
WANTED_FILE=$1
TO_EDIT_FILE=$2
cat $WANTED_FILE > $TO_EDIT_FILE

View file

@ -0,0 +1,13 @@
FROM golang:1.8-stretch
RUN echo "deb http://ftp.debian.org/debian wheezy-backports main" >> /etc/apt/sources.list
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
unzip \
python python-pip jq \
&& rm -rf /var/lib/apt/lists/*
RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl && \
chmod +x ./kubectl && \
mv ./kubectl /bin/kubectl

View file

@ -0,0 +1,230 @@
// The e-mail address used to login as the admin user to the Tectonic Console.
//
// Note: This field MUST be set manually prior to creating the cluster.
tectonic_admin_email = "monitoring@coreos.com"
// The bcrypt hash of admin user password to login to the Tectonic Console.
// Use the bcrypt-hash tool (https://github.com/coreos/bcrypt-tool/releases/tag/v1.0.0) to generate it.
//
// Note: This field MUST be set manually prior to creating the cluster.
tectonic_admin_password_hash = ""
// (optional) Extra AWS tags to be applied to created autoscaling group resources.
// This is a list of maps having the keys `key`, `value` and `propagate_at_launch`.
//
// Example: `[ { key = "foo", value = "bar", propagate_at_launch = true } ]`
// tectonic_autoscaling_group_extra_tags = ""
// Number of Availability Zones your EC2 instances will be deployed across.
// This should be less than or equal to the total number available in the region.
// Be aware that some regions only have 2.
// If set worker and master subnet CIDRs are calculated automatically.
//
// Note:
// This field MUST be set manually prior to creating the cluster.
// It MUST NOT be set if availability zones CIDRs are configured using `tectonic_aws_master_custom_subnets` and `tectonic_aws_worker_custom_subnets`.
tectonic_aws_az_count = "2"
// Instance size for the etcd node(s). Example: `t2.medium`.
tectonic_aws_etcd_ec2_type = "t2.medium"
// The amount of provisioned IOPS for the root block device of etcd nodes.
tectonic_aws_etcd_root_volume_iops = "100"
// The size of the volume in gigabytes for the root block device of etcd nodes.
tectonic_aws_etcd_root_volume_size = "30"
// The type of volume for the root block device of etcd nodes.
tectonic_aws_etcd_root_volume_type = "gp2"
// (optional) List of subnet IDs within an existing VPC to deploy master nodes into.
// Required to use an existing VPC and the list must match the AZ count.
//
// Example: `["subnet-111111", "subnet-222222", "subnet-333333"]`
// tectonic_aws_external_master_subnet_ids = ""
// (optional) ID of an existing VPC to launch nodes into.
// If unset a new VPC is created.
//
// Example: `vpc-123456`
// tectonic_aws_external_vpc_id = ""
// If set to true, create public facing ingress resources (ELB, A-records).
// If set to false, a "private" cluster will be created with an internal ELB only.
tectonic_aws_external_vpc_public = true
// (optional) List of subnet IDs within an existing VPC to deploy worker nodes into.
// Required to use an existing VPC and the list must match the AZ count.
//
// Example: `["subnet-111111", "subnet-222222", "subnet-333333"]`
// tectonic_aws_external_worker_subnet_ids = ""
// (optional) Extra AWS tags to be applied to created resources.
// tectonic_aws_extra_tags = ""
// (optional) This configures master availability zones and their corresponding subnet CIDRs directly.
//
// Example:
// `{ eu-west-1a = "10.0.0.0/20", eu-west-1b = "10.0.16.0/20" }`
//
// Note that `tectonic_aws_az_count` must be unset if this is specified.
// tectonic_aws_master_custom_subnets = ""
// Instance size for the master node(s). Example: `t2.medium`.
tectonic_aws_master_ec2_type = "t2.medium"
// The amount of provisioned IOPS for the root block device of master nodes.
tectonic_aws_master_root_volume_iops = "100"
// The size of the volume in gigabytes for the root block device of master nodes.
tectonic_aws_master_root_volume_size = "30"
// The type of volume for the root block device of master nodes.
tectonic_aws_master_root_volume_type = "gp2"
// The target AWS region for the cluster.
tectonic_aws_region = "eu-west-2"
// Name of an SSH key located within the AWS region. Example: coreos-user.
tectonic_aws_ssh_key = "jenkins-tpo-ssh-key"
// Block of IP addresses used by the VPC.
// This should not overlap with any other networks, such as a private datacenter connected via Direct Connect.
tectonic_aws_vpc_cidr_block = "10.0.0.0/16"
// (optional) This configures worker availability zones and their corresponding subnet CIDRs directly.
//
// Example: `{ eu-west-1a = "10.0.64.0/20", eu-west-1b = "10.0.80.0/20" }`
//
// Note that `tectonic_aws_az_count` must be unset if this is specified.
// tectonic_aws_worker_custom_subnets = ""
// Instance size for the worker node(s). Example: `t2.medium`.
tectonic_aws_worker_ec2_type = "t2.medium"
// The amount of provisioned IOPS for the root block device of worker nodes.
tectonic_aws_worker_root_volume_iops = "100"
// The size of the volume in gigabytes for the root block device of worker nodes.
tectonic_aws_worker_root_volume_size = "30"
// The type of volume for the root block device of worker nodes.
tectonic_aws_worker_root_volume_type = "gp2"
// The base DNS domain of the cluster.
//
// Example: `openstack.dev.coreos.systems`.
//
// Note: This field MUST be set manually prior to creating the cluster.
// This applies only to cloud platforms.
tectonic_base_domain = "dev.coreos.systems"
// (optional) The content of the PEM-encoded CA certificate, used to generate Tectonic Console's server certificate.
// If left blank, a CA certificate will be automatically generated.
// tectonic_ca_cert = ""
// (optional) The content of the PEM-encoded CA key, used to generate Tectonic Console's server certificate.
// This field is mandatory if `tectonic_ca_cert` is set.
// tectonic_ca_key = ""
// (optional) The algorithm used to generate tectonic_ca_key.
// The default value is currently recommend.
// This field is mandatory if `tectonic_ca_cert` is set.
// tectonic_ca_key_alg = "RSA"
// The Container Linux update channel.
//
// Examples: `stable`, `beta`, `alpha`
tectonic_cl_channel = "stable"
// This declares the IP range to assign Kubernetes pod IPs in CIDR notation.
tectonic_cluster_cidr = "10.2.0.0/16"
// The name of the cluster.
// If used in a cloud-environment, this will be prepended to `tectonic_base_domain` resulting in the URL to the Tectonic console.
//
// Note: This field MUST be set manually prior to creating the cluster.
// Set via env variable
//tectonic_cluster_name = ""
// (optional) DNS prefix used to construct the console and API server endpoints.
// tectonic_dns_name = ""
// (optional) The path of the file containing the CA certificate for TLS communication with etcd.
//
// Note: This works only when used in conjunction with an external etcd cluster.
// If set, the variables `tectonic_etcd_servers`, `tectonic_etcd_client_cert_path`, and `tectonic_etcd_client_key_path` must also be set.
// tectonic_etcd_ca_cert_path = ""
// (optional) The path of the file containing the client certificate for TLS communication with etcd.
//
// Note: This works only when used in conjunction with an external etcd cluster.
// If set, the variables `tectonic_etcd_servers`, `tectonic_etcd_ca_cert_path`, and `tectonic_etcd_client_key_path` must also be set.
// tectonic_etcd_client_cert_path = ""
// (optional) The path of the file containing the client key for TLS communication with etcd.
//
// Note: This works only when used in conjunction with an external etcd cluster.
// If set, the variables `tectonic_etcd_servers`, `tectonic_etcd_ca_cert_path`, and `tectonic_etcd_client_cert_path` must also be set.
// tectonic_etcd_client_key_path = ""
// The number of etcd nodes to be created.
// If set to zero, the count of etcd nodes will be determined automatically.
//
// Note: This is currently only supported on AWS.
tectonic_etcd_count = "0"
// (optional) List of external etcd v3 servers to connect with (hostnames/IPs only).
// Needs to be set if using an external etcd cluster.
//
// Example: `["etcd1", "etcd2", "etcd3"]`
// tectonic_etcd_servers = ""
// If set to true, experimental Tectonic assets are being deployed.
tectonic_experimental = false
// The Kubernetes service IP used to reach kube-apiserver inside the cluster
// as returned by `kubectl -n default get service kubernetes`.
tectonic_kube_apiserver_service_ip = "10.3.0.1"
// The Kubernetes service IP used to reach kube-dns inside the cluster
// as returned by `kubectl -n kube-system get service kube-dns`.
tectonic_kube_dns_service_ip = "10.3.0.10"
// The Kubernetes service IP used to reach self-hosted etcd inside the cluster
// as returned by `kubectl -n kube-system get service etcd-service`.
tectonic_kube_etcd_service_ip = "10.3.0.15"
// The path to the tectonic licence file.
//
// Note: This field MUST be set manually prior to creating the cluster.
tectonic_license_path = "/go/src/github.com/coreos/tectonic-installer/license"
// The number of master nodes to be created.
// This applies only to cloud platforms.
tectonic_master_count = "1"
// The path the pull secret file in JSON format.
//
// Note: This field MUST be set manually prior to creating the cluster.
tectonic_pull_secret_path = "/go/src/github.com/coreos/tectonic-installer/secret"
// This declares the IP range to assign Kubernetes service cluster IPs in CIDR notation.
tectonic_service_cidr = "10.3.0.0/16"
// If set to true, a vanilla Kubernetes cluster will be deployed, omitting any Tectonic assets.
tectonic_vanilla_k8s = true
// The number of worker nodes to be created.
// This applies only to cloud platforms.
tectonic_worker_count = "3"
tectonic_autoscaling_group_extra_tags = [
{ key = "createdBy", value = "team-monitoring@coreos.com", propagate_at_launch = true },
{ key = "expirationDate", value = "2017-01-01", propagate_at_launch = true }
]
tectonic_aws_extra_tags = {
"createdBy"="team-monitoring@coreos.com",
"expirationDate"="2017-01-01"
}

View file

@ -1,14 +0,0 @@
apiVersion: kops/v1alpha2
kind: InstanceGroup
metadata:
name: nodes
spec:
associatePublicIp: true
machineType: t2.medium
maxSize: 2
minSize: 2
nodeLabels:
isolation: none
role: Node
zones:
- eu-west-1a

View file

@ -8,14 +8,26 @@ set -u
# print each command before executing it
set -x
PO_GOPATH=/go/src/github.com/coreos/prometheus-operator
export {TF_GET_OPTIONS,TF_PLAN_OPTIONS,TF_APPLY_OPTIONS,TF_DESTROY_OPTIONS}="-no-color"
CLUSTER="po-$(git rev-parse --short HEAD)-${BUILD_ID}"
TF_VAR_tectonic_cluster_name="${CLUSTER}"
TF_VAR_tectonic_dns_name="${CLUSTER}"
TECTONIC_INSTALLER_DIR=/go/src/github.com/coreos/tectonic-installer
docker run \
--rm \
-e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY \
-v $PWD:$PO_GOPATH \
-w $PO_GOPATH/scripts/jenkins \
cluster-setup-env \
/bin/bash -c "make clean"
-v $PWD/build/:$TECTONIC_INSTALLER_DIR/build/ \
-v ~/.ssh:$HOME/.ssh \
-e AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY \
-e TF_GET_OPTIONS \
-e TF_DESTROY_OPTIONS \
-e CLUSTER=${CLUSTER} \
-w $TECTONIC_INSTALLER_DIR \
-e TF_VAR_tectonic_cluster_name=${TF_VAR_tectonic_cluster_name} \
-e TF_VAR_tectonic_dns_name=${TF_VAR_tectonic_dns_name} \
quay.io/coreos/tectonic-installer:master \
/bin/bash -c "make destroy || make destroy || make destroy"
docker rmi quay.io/coreos/prometheus-operator-dev:$BUILD_ID

View file

@ -8,25 +8,79 @@ set -u
# print each command before executing it
set -x
# Push docker image
DOCKER_SOCKET=/var/run/docker.sock
PO_QUAY_REPO=quay.io/coreos/prometheus-operator-dev
docker build -t cluster-setup-env scripts/jenkins/.
docker build -t docker-golang-env -f scripts/jenkins/docker-golang-env/Dockerfile .
docker run \
--rm \
-v $PWD:$PWD -v $DOCKER_SOCKET:$DOCKER_SOCKET \
cluster-setup-env \
docker-golang-env \
/bin/bash -c "cd $PWD && make crossbuild"
docker build -t $PO_QUAY_REPO:$BUILD_ID .
docker login -u="$QUAY_ROBOT_USERNAME" -p="$QUAY_ROBOT_SECRET" quay.io
docker push $PO_QUAY_REPO:$BUILD_ID
# Bring up k8s cluster
export {TF_GET_OPTIONS,TF_PLAN_OPTIONS,TF_APPLY_OPTIONS,TF_DESTROY_OPTIONS}="-no-color"
CLUSTER="po-$(git rev-parse --short HEAD)-${BUILD_ID}"
TF_VAR_tectonic_cluster_name="${CLUSTER}"
TF_VAR_tectonic_dns_name="${CLUSTER}"
TECTONIC_INSTALLER_DIR=/go/src/github.com/coreos/tectonic-installer
PO_DIR=/go/src/github.com/coreos/prometheus-operator
KUBECONFIG="${PO_DIR}/build/${CLUSTER}/generated/auth/kubeconfig"
mkdir -p build/${CLUSTER}
cp ${WORKSPACE}/scripts/jenkins/kubernetes-vanilla.tfvars build/${CLUSTER}/terraform.tfvars
docker run \
--rm \
-e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY \
-e REPO=$PO_QUAY_REPO -e TAG=$BUILD_ID \
-v $PWD:/go/src/github.com/coreos/prometheus-operator \
-w /go/src/github.com/coreos/prometheus-operator/scripts/jenkins \
cluster-setup-env \
/bin/bash -c "make"
-v $PWD/build/:$TECTONIC_INSTALLER_DIR/build/ \
-v ~/.ssh:$HOME/.ssh \
-e AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY \
-e TF_GET_OPTIONS \
-e TF_PLAN_OPTIONS \
-e TF_APPLY_OPTIONS \
-e CLUSTER=${CLUSTER} \
-e TF_VAR_tectonic_cluster_name=${TF_VAR_tectonic_cluster_name} \
-e TF_VAR_tectonic_dns_name=${TF_VAR_tectonic_dns_name} \
-w $TECTONIC_INSTALLER_DIR \
quay.io/coreos/tectonic-installer:master \
/bin/bash -c "touch license secret && make plan && make apply"
docker build \
-t kubectl-env \
-f scripts/jenkins/kubectl-env/Dockerfile \
.
sleep 5m
docker run \
--rm \
-v $PWD:$PO_DIR \
-w $PO_DIR \
-e KUBECONFIG=${KUBECONFIG} \
kubectl-env \
/bin/bash -c "timeout 900 ./scripts/jenkins/wait-for-cluster.sh 4"
# Run e2e tests
docker run \
--rm \
-v $PWD:$PO_DIR \
-w $PO_DIR \
-e KUBECONFIG=${KUBECONFIG} \
-e REPO=$PO_QUAY_REPO \
-e TAG=$BUILD_ID \
kubectl-env \
/bin/bash -c "make e2e-test"

View file

@ -1,56 +0,0 @@
variable "dns_domain" {}
variable "cluster_name" {}
data "aws_route53_zone" "monitoring_zone" {
name = "${var.dns_domain}"
}
resource "aws_route53_zone" "cluster_zone" {
name = "${var.cluster_name}.${var.dns_domain}"
}
resource "aws_route53_record" "cluster_zone_record" {
name = "${var.cluster_name}.${var.dns_domain}"
zone_id = "${data.aws_route53_zone.monitoring_zone.zone_id}"
type = "NS"
ttl = "300"
records = ["${aws_route53_zone.cluster_zone.name_servers}"]
}
resource "aws_s3_bucket" "kops-state" {
bucket = "kops-${sha1("${var.cluster_name}-${var.dns_domain}")}"
}
resource "aws_security_group" "allow_all" {
name = "allow_all"
description = "Allow all inbound traffic"
vpc_id = "${aws_vpc.main.id}"
ingress {
from_port = 30000
to_port = 32767
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
ingress {
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
tags {
Name = "allow_all"
}
}
resource "aws_vpc" "main" {
cidr_block = "172.20.0.0/16"
}
resource "aws_internet_gateway" "gw" {
vpc_id = "${aws_vpc.main.id}"
}

View file

@ -1,11 +0,0 @@
output "kops_state_bucket" {
value = "s3://${aws_s3_bucket.kops-state.id}"
}
output "kops_master_security_group" {
value = "${aws_security_group.allow_all.id}"
}
output "kops_main_vpc" {
value = "${aws_vpc.main.id}"
}

View file

@ -16,10 +16,8 @@ package e2e
import (
"fmt"
"net/http"
"strconv"
"testing"
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/pkg/api/v1"
@ -34,6 +32,7 @@ func TestAlertmanagerCreateDeleteCluster(t *testing.T) {
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
name := "test"
@ -52,6 +51,7 @@ func TestAlertmanagerScaling(t *testing.T) {
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
name := "test"
@ -74,6 +74,7 @@ func TestAlertmanagerVersionMigration(t *testing.T) {
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
name := "test"
@ -94,38 +95,13 @@ func TestAlertmanagerVersionMigration(t *testing.T) {
}
}
func TestExposingAlertmanagerWithNodePort(t *testing.T) {
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
alertmanager := framework.MakeBasicAlertmanager("test-alertmanager", 1)
alertmanagerService := framework.MakeAlertmanagerNodePortService(alertmanager.Name, "nodeport-service", 30903)
if err := framework.CreateAlertmanagerAndWaitUntilReady(ns, alertmanager); err != nil {
t.Fatal(err)
}
if finalizerFn, err := testFramework.CreateServiceAndWaitUntilReady(framework.KubeClient, ns, alertmanagerService); err != nil {
t.Fatal(err)
} else {
ctx.AddFinalizerFn(finalizerFn)
}
resp, err := http.Get(fmt.Sprintf("http://%s:30903/", framework.ClusterIP))
if err != nil {
t.Fatal("Retrieving alertmanager landing page failed with error: ", err)
} else if resp.StatusCode != 200 {
t.Fatal("Retrieving alertmanager landing page failed with http status code: ", resp.StatusCode)
}
}
func TestExposingAlertmanagerWithKubernetesAPI(t *testing.T) {
t.Parallel()
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
alertmanager := framework.MakeBasicAlertmanager("test-alertmanager", 1)
alertmanagerService := framework.MakeAlertmanagerService(alertmanager.Name, "alertmanager-service", v1.ServiceTypeClusterIP)
@ -146,50 +122,13 @@ func TestExposingAlertmanagerWithKubernetesAPI(t *testing.T) {
}
}
func TestExposingAlertmanagerWithIngress(t *testing.T) {
t.Parallel()
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
alertmanager := framework.MakeBasicAlertmanager("main", 1)
alertmanagerService := framework.MakeAlertmanagerService(alertmanager.Name, "test-group", v1.ServiceTypeClusterIP)
ingress := testFramework.MakeBasicIngress(alertmanagerService.Name, 9093)
if err := testFramework.SetupNginxIngressControllerIncDefaultBackend(framework.KubeClient, ns); err != nil {
t.Fatal(err)
}
if err := framework.CreateAlertmanagerAndWaitUntilReady(ns, alertmanager); err != nil {
t.Fatal(err)
}
if _, err := testFramework.CreateServiceAndWaitUntilReady(framework.KubeClient, ns, alertmanagerService); err != nil {
t.Fatal(err)
}
if err := testFramework.CreateIngress(framework.KubeClient, ns, ingress); err != nil {
t.Fatal(err)
}
ip, err := testFramework.GetIngressIP(framework.KubeClient, ns, ingress.Name)
if err != nil {
t.Fatal(err)
}
err = testFramework.WaitForHTTPSuccessStatusCode(time.Minute, fmt.Sprintf("http://%s/metrics", *ip))
if err != nil {
t.Fatal(err)
}
}
func TestMeshInitialization(t *testing.T) {
t.Parallel()
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
var amountAlertmanagers int32 = 3
alertmanager := &v1alpha1.Alertmanager{
@ -226,6 +165,7 @@ func TestAlertmanagerReloadConfig(t *testing.T) {
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
alertmanager := framework.MakeBasicAlertmanager("reload-config", 1)

View file

@ -56,12 +56,6 @@ func (f *Framework) MakeBasicAlertmanager(name string, replicas int32) *v1alpha1
}
}
func (f *Framework) MakeAlertmanagerNodePortService(name, group string, nodePort int32) *v1.Service {
aMService := f.MakeAlertmanagerService(name, group, v1.ServiceTypeNodePort)
aMService.Spec.Ports[0].NodePort = nodePort
return aMService
}
func (f *Framework) MakeAlertmanagerService(name, group string, serviceType v1.ServiceType) *v1.Service {
service := &v1.Service{
ObjectMeta: metav1.ObjectMeta{

View file

@ -21,29 +21,32 @@ import (
rbacv1alpha1 "k8s.io/client-go/pkg/apis/rbac/v1alpha1"
)
func CreateClusterRoleBinding(kubeClient kubernetes.Interface, relativePath string) error {
func CreateClusterRoleBinding(kubeClient kubernetes.Interface, ns string, relativePath string) (finalizerFn, error) {
finalizerFn := func() error { return DeleteClusterRoleBinding(kubeClient, relativePath) }
clusterRoleBinding, err := parseClusterRoleBindingYaml(relativePath)
if err != nil {
return err
return finalizerFn, err
}
clusterRoleBinding.Subjects[0].Namespace = ns
_, err = kubeClient.RbacV1alpha1().ClusterRoleBindings().Get(clusterRoleBinding.Name, metav1.GetOptions{})
if err == nil {
// ClusterRoleBinding already exists -> Update
_, err = kubeClient.RbacV1alpha1().ClusterRoleBindings().Update(clusterRoleBinding)
if err != nil {
return err
return finalizerFn, err
}
} else {
// ClusterRoleBinding doesn't exists -> Create
_, err = kubeClient.RbacV1alpha1().ClusterRoleBindings().Create(clusterRoleBinding)
if err != nil {
return err
return finalizerFn, err
}
}
return nil
return finalizerFn, err
}
func DeleteClusterRoleBinding(kubeClient kubernetes.Interface, relativePath string) error {

View file

@ -15,6 +15,8 @@
package framework
import (
"fmt"
"github.com/pkg/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/yaml"
"k8s.io/client-go/kubernetes"
@ -28,7 +30,7 @@ func MakeDeployment(pathToYaml string) (*v1beta1.Deployment, error) {
}
tectonicPromOp := v1beta1.Deployment{}
if err := yaml.NewYAMLOrJSONDecoder(manifest, 100).Decode(&tectonicPromOp); err != nil {
return nil, err
return nil, errors.Wrap(err, fmt.Sprintf("failed to decode file %s", pathToYaml))
}
return &tectonicPromOp, nil
@ -37,7 +39,7 @@ func MakeDeployment(pathToYaml string) (*v1beta1.Deployment, error) {
func CreateDeployment(kubeClient kubernetes.Interface, namespace string, d *v1beta1.Deployment) error {
_, err := kubeClient.Extensions().Deployments(namespace).Create(d)
if err != nil {
return err
return errors.Wrap(err, fmt.Sprintf("failed to create deployment %s", d.Name))
}
return nil
}

View file

@ -16,6 +16,7 @@ package framework
import (
"net/http"
"testing"
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -37,30 +38,29 @@ type Framework struct {
MasterHost string
Namespace *v1.Namespace
OperatorPod *v1.Pod
ClusterIP string
DefaultTimeout time.Duration
}
// Setup setups a test framework and returns it.
func New(ns, kubeconfig, opImage, ip string) (*Framework, error) {
func New(ns, kubeconfig, opImage string) (*Framework, error) {
config, err := clientcmd.BuildConfigFromFlags("", kubeconfig)
if err != nil {
return nil, err
return nil, errors.Wrap(err, "build config from flags failed")
}
cli, err := kubernetes.NewForConfig(config)
if err != nil {
return nil, err
return nil, errors.Wrap(err, "creating new kube-client failed")
}
httpc := cli.CoreV1().RESTClient().(*rest.RESTClient).Client
if err != nil {
return nil, err
return nil, errors.Wrap(err, "creating http-client failed")
}
mclient, err := v1alpha1.NewForConfig(config)
if err != nil {
return nil, err
return nil, errors.Wrap(err, "creating monitoring client failed")
}
namespace, err := CreateNamespace(cli, ns)
@ -74,13 +74,12 @@ func New(ns, kubeconfig, opImage, ip string) (*Framework, error) {
MonClient: mclient,
HTTPClient: httpc,
Namespace: namespace,
ClusterIP: ip,
DefaultTimeout: time.Minute,
}
err = f.Setup(opImage)
if err != nil {
return nil, err
return nil, errors.Wrap(err, "setup test environment failed")
}
return f, nil
@ -88,13 +87,29 @@ func New(ns, kubeconfig, opImage, ip string) (*Framework, error) {
func (f *Framework) Setup(opImage string) error {
if err := f.setupPrometheusOperator(opImage); err != nil {
return err
return errors.Wrap(err, "setup prometheus operator failed")
}
return nil
}
func (f *Framework) setupPrometheusOperator(opImage string) error {
deploy, err := MakeDeployment("../../example/non-rbac/prometheus-operator.yaml")
if _, err := CreateServiceAccount(f.KubeClient, f.Namespace.Name, "../../example/rbac/prometheus-operator/prometheus-operator-service-account.yaml"); err != nil {
return errors.Wrap(err, "failed to create prometheus operator service account")
}
if err := CreateClusterRole(f.KubeClient, "../../example/rbac/prometheus-operator/prometheus-operator-cluster-role.yaml"); err != nil {
return errors.Wrap(err, "failed to create prometheus cluster role")
}
if _, err := CreateClusterRoleBinding(f.KubeClient, f.Namespace.Name, "../../example/rbac/prometheus-operator/prometheus-operator-cluster-role-binding.yaml"); err != nil {
return errors.Wrap(err, "failed to create prometheus cluster role binding")
}
if err := CreateClusterRole(f.KubeClient, "../../example/rbac/prometheus/prometheus-cluster-role.yaml"); err != nil {
return errors.Wrap(err, "failed to create prometheus cluster role")
}
deploy, err := MakeDeployment("../../example/rbac/prometheus-operator/prometheus-operator.yaml")
if err != nil {
return err
}
@ -134,6 +149,20 @@ func (f *Framework) setupPrometheusOperator(opImage string) error {
return k8sutil.WaitForCRDReady(f.KubeClient.Core().RESTClient(), v1alpha1.Group, v1alpha1.Version, v1alpha1.AlertmanagerName)
}
func (ctx *TestCtx) SetupPrometheusRBAC(t *testing.T, ns string, kubeClient kubernetes.Interface) {
if finalizerFn, err := CreateServiceAccount(kubeClient, ns, "../../example/rbac/prometheus/prometheus-service-account.yaml"); err != nil {
t.Fatal(errors.Wrap(err, "failed to create prometheus service account"))
} else {
ctx.AddFinalizerFn(finalizerFn)
}
if finalizerFn, err := CreateRoleBinding(kubeClient, ns, "framework/ressources/prometheus-role-binding.yml"); err != nil {
t.Fatal(errors.Wrap(err, "failed to create prometheus role binding"))
} else {
ctx.AddFinalizerFn(finalizerFn)
}
}
// Teardown tears down a previously initialized test environment.
func (f *Framework) Teardown() error {
if err := f.KubeClient.Core().Services(f.Namespace.Name).Delete("prometheus-operated", nil); err != nil && !k8sutil.IsResourceNotFoundError(err) {

View file

@ -34,12 +34,12 @@ import (
func PathToOSFile(relativPath string) (*os.File, error) {
path, err := filepath.Abs(relativPath)
if err != nil {
return nil, err
return nil, errors.Wrap(err, fmt.Sprintf("failed generate absolut file path of %s", relativPath))
}
manifest, err := os.Open(path)
if err != nil {
return nil, err
return nil, errors.Wrap(err, fmt.Sprintf("failed to open file %s", path))
}
return manifest, nil

View file

@ -17,8 +17,8 @@ package framework
import (
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"time"
"k8s.io/apimachinery/pkg/api/resource"
@ -40,11 +40,13 @@ func (f *Framework) MakeBasicPrometheus(ns, name, group string, replicas int32)
},
Spec: v1alpha1.PrometheusSpec{
Replicas: &replicas,
Version: prometheus.DefaultVersion,
ServiceMonitorSelector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"group": group,
},
},
ServiceAccountName: "prometheus",
RuleSelector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"role": "rulefile",
@ -95,12 +97,6 @@ func (f *Framework) MakeBasicServiceMonitor(name string) *v1alpha1.ServiceMonito
}
}
func (f *Framework) MakeBasicPrometheusNodePortService(name, group string, nodePort int32) *v1.Service {
pService := f.MakePrometheusService(name, group, v1.ServiceTypeNodePort)
pService.Spec.Ports[0].NodePort = nodePort
return pService
}
func (f *Framework) MakePrometheusService(name, group string, serviceType v1.ServiceType) *v1.Service {
service := &v1.Service{
ObjectMeta: metav1.ObjectMeta{
@ -202,12 +198,12 @@ func promImage(version string) string {
return fmt.Sprintf("quay.io/prometheus/prometheus:%s", version)
}
func (f *Framework) WaitForTargets(amount int) error {
func (f *Framework) WaitForTargets(ns, svcName string, amount int) error {
var targets []*Target
if err := wait.Poll(time.Second, time.Minute*10, func() (bool, error) {
var err error
targets, err = f.GetActiveTargets()
targets, err = f.GetActiveTargets(ns, svcName)
if err != nil {
return false, err
}
@ -224,15 +220,20 @@ func (f *Framework) WaitForTargets(amount int) error {
return nil
}
func (f *Framework) GetActiveTargets() ([]*Target, error) {
resp, err := http.Get(fmt.Sprintf("http://%s:30900/api/v1/targets", f.ClusterIP))
func (f *Framework) QueryPrometheusSVC(ns, svcName, endpoint string, query map[string]string) (io.ReadCloser, error) {
ProxyGet := f.KubeClient.CoreV1().Services(ns).ProxyGet
request := ProxyGet("", svcName, "web", endpoint, query)
return request.Stream()
}
func (f *Framework) GetActiveTargets(ns, svcName string) ([]*Target, error) {
response, err := f.QueryPrometheusSVC(ns, svcName, "/api/v1/targets", map[string]string{})
if err != nil {
return nil, err
}
defer resp.Body.Close()
rt := prometheusTargetAPIResponse{}
if err := json.NewDecoder(resp.Body).Decode(&rt); err != nil {
if err := json.NewDecoder(response).Decode(&rt); err != nil {
return nil, err
}

View file

@ -0,0 +1,11 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: RoleBinding
metadata:
name: prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus
subjects:
- kind: ServiceAccount
name: prometheus

View file

@ -0,0 +1,60 @@
// Copyright 2017 The prometheus-operator Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package framework
import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/yaml"
"k8s.io/client-go/kubernetes"
rbacv1alpha1 "k8s.io/client-go/pkg/apis/rbac/v1alpha1"
)
func CreateRoleBinding(kubeClient kubernetes.Interface, ns string, relativePath string) (finalizerFn, error) {
finalizerFn := func() error { return DeleteRoleBinding(kubeClient, ns, relativePath) }
roleBinding, err := parseRoleBindingYaml(relativePath)
if err != nil {
return finalizerFn, err
}
_, err = kubeClient.RbacV1alpha1().RoleBindings(ns).Create(roleBinding)
return finalizerFn, err
}
func DeleteRoleBinding(kubeClient kubernetes.Interface, ns string, relativePath string) error {
roleBinding, err := parseRoleBindingYaml(relativePath)
if err != nil {
return err
}
if err := kubeClient.RbacV1alpha1().RoleBindings(ns).Delete(roleBinding.Name, &metav1.DeleteOptions{}); err != nil {
return err
}
return nil
}
func parseRoleBindingYaml(relativePath string) (*rbacv1alpha1.RoleBinding, error) {
manifest, err := PathToOSFile(relativePath)
if err != nil {
return nil, err
}
roleBinding := rbacv1alpha1.RoleBinding{}
if err := yaml.NewYAMLOrJSONDecoder(manifest, 100).Decode(&roleBinding); err != nil {
return nil, err
}
return &roleBinding, nil
}

View file

@ -20,21 +20,40 @@ import (
"k8s.io/client-go/pkg/api/v1"
)
func CreateServiceAccount(kubeClient kubernetes.Interface, namespace string, relativPath string) error {
func CreateServiceAccount(kubeClient kubernetes.Interface, namespace string, relativPath string) (finalizerFn, error) {
finalizerFn := func() error { return DeleteServiceAccount(kubeClient, namespace, relativPath) }
serviceAccount, err := parseServiceAccountYaml(relativPath)
if err != nil {
return finalizerFn, err
}
_, err = kubeClient.CoreV1().ServiceAccounts(namespace).Create(serviceAccount)
if err != nil {
return finalizerFn, err
}
return finalizerFn, nil
}
func parseServiceAccountYaml(relativPath string) (*v1.ServiceAccount, error) {
manifest, err := PathToOSFile(relativPath)
if err != nil {
return err
return nil, err
}
serviceAccount := v1.ServiceAccount{}
if err := yaml.NewYAMLOrJSONDecoder(manifest, 100).Decode(&serviceAccount); err != nil {
return err
return nil, err
}
_, err = kubeClient.CoreV1().ServiceAccounts(namespace).Create(&serviceAccount)
return &serviceAccount, nil
}
func DeleteServiceAccount(kubeClient kubernetes.Interface, namespace string, relativPath string) error {
serviceAccount, err := parseServiceAccountYaml(relativPath)
if err != nil {
return err
}
return nil
return kubeClient.CoreV1().ServiceAccounts(namespace).Delete(serviceAccount.Name, nil)
}

View file

@ -32,7 +32,6 @@ func TestMain(m *testing.M) {
kubeconfig := flag.String("kubeconfig", "", "kube config path, e.g. $HOME/.kube/config")
opImage := flag.String("operator-image", "", "operator image, e.g. quay.io/coreos/prometheus-operator")
ns := flag.String("namespace", "prometheus-operator-e2e-tests", "e2e test namespace")
ip := flag.String("cluster-ip", "", "ip of the kubernetes cluster to use for external requests")
flag.Parse()
var (
@ -40,7 +39,7 @@ func TestMain(m *testing.M) {
code int = 0
)
if framework, err = operatorFramework.New(*ns, *kubeconfig, *opImage, *ip); err != nil {
if framework, err = operatorFramework.New(*ns, *kubeconfig, *opImage); err != nil {
log.Printf("failed to setup framework: %v\n", err)
os.Exit(1)
}

View file

@ -18,7 +18,6 @@ import (
"encoding/json"
"fmt"
"log"
"net/http"
"reflect"
"sort"
"strings"
@ -27,6 +26,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/pkg/api/v1"
@ -43,6 +43,7 @@ func TestPrometheusCreateDeleteCluster(t *testing.T) {
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
name := "test"
@ -64,6 +65,7 @@ func TestPrometheusScaleUpDownCluster(t *testing.T) {
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
name := "test"
@ -86,6 +88,7 @@ func TestPrometheusVersionMigration(t *testing.T) {
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
name := "test"
startVersion := prometheus.CompatibilityMatrix[0]
@ -114,6 +117,7 @@ func TestPrometheusResourceUpdate(t *testing.T) {
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
name := "test"
@ -162,6 +166,7 @@ func TestPrometheusReloadConfig(t *testing.T) {
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
name := "test"
replicas := int32(1)
@ -201,7 +206,7 @@ scrape_configs:
},
}
svc := framework.MakeBasicPrometheusNodePortService(name, "reloadconfig-group", 30900)
svc := framework.MakePrometheusService(p.Name, "not-relevant", v1.ServiceTypeClusterIP)
if _, err := framework.KubeClient.CoreV1().Secrets(ns).Create(cfg); err != nil {
t.Fatal(err)
@ -217,7 +222,7 @@ scrape_configs:
ctx.AddFinalizerFn(finalizerFn)
}
if err := framework.WaitForTargets(1); err != nil {
if err := framework.WaitForTargets(ns, svc.Name, 1); err != nil {
t.Fatal(err)
}
@ -238,7 +243,7 @@ scrape_configs:
t.Fatal(err)
}
if err := framework.WaitForTargets(2); err != nil {
if err := framework.WaitForTargets(ns, svc.Name, 2); err != nil {
t.Fatal(err)
}
}
@ -249,6 +254,7 @@ func TestPrometheusReloadRules(t *testing.T) {
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
name := "test"
@ -269,7 +275,8 @@ func TestPrometheusReloadRules(t *testing.T) {
t.Fatal(err)
}
if err := framework.CreatePrometheusAndWaitUntilReady(ns, framework.MakeBasicPrometheus(ns, name, name, 1)); err != nil {
p := framework.MakeBasicPrometheus(ns, name, name, 1)
if err := framework.CreatePrometheusAndWaitUntilReady(ns, p); err != nil {
t.Fatal(err)
}
@ -301,10 +308,11 @@ func TestPrometheusDiscovery(t *testing.T) {
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
prometheusName := "test"
group := "servicediscovery-test"
svc := framework.MakeBasicPrometheusNodePortService(prometheusName, group, 30900)
svc := framework.MakePrometheusService(prometheusName, group, v1.ServiceTypeClusterIP)
s := framework.MakeBasicServiceMonitor(group)
if _, err := framework.MonClient.ServiceMonitors(ns).Create(s); err != nil {
@ -328,7 +336,7 @@ func TestPrometheusDiscovery(t *testing.T) {
t.Fatal("Generated Secret could not be retrieved: ", err)
}
err = wait.Poll(time.Second, 18*time.Minute, isDiscoveryWorking(ns, prometheusName))
err = wait.Poll(time.Second, 18*time.Minute, isDiscoveryWorking(ns, svc.Name, prometheusName))
if err != nil {
t.Fatal(errors.Wrap(err, "validating Prometheus target discovery failed"))
}
@ -338,12 +346,13 @@ func TestPrometheusAlertmanagerDiscovery(t *testing.T) {
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
prometheusName := "test"
alertmanagerName := "test"
group := "servicediscovery-test"
svc := framework.MakeBasicPrometheusNodePortService(prometheusName, group, 30900)
amsvc := framework.MakeAlertmanagerNodePortService(alertmanagerName, group, 30903)
svc := framework.MakePrometheusService(prometheusName, group, v1.ServiceTypeClusterIP)
amsvc := framework.MakeAlertmanagerService(alertmanagerName, group, v1.ServiceTypeClusterIP)
p := framework.MakeBasicPrometheus(ns, prometheusName, group, 1)
framework.AddAlertingToPrometheus(p, ns, alertmanagerName)
@ -376,44 +385,19 @@ func TestPrometheusAlertmanagerDiscovery(t *testing.T) {
t.Fatal(errors.Wrap(err, "creating Alertmanager service failed"))
}
err = wait.Poll(time.Second, 18*time.Minute, isAlertmanagerDiscoveryWorking(ns, alertmanagerName))
err = wait.Poll(time.Second, 18*time.Minute, isAlertmanagerDiscoveryWorking(ns, svc.Name, alertmanagerName))
if err != nil {
t.Fatal(errors.Wrap(err, "validating Prometheus Alertmanager discovery failed"))
}
}
func TestExposingPrometheusWithNodePort(t *testing.T) {
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
basicPrometheus := framework.MakeBasicPrometheus(ns, "test", "test", 1)
service := framework.MakeBasicPrometheusNodePortService(basicPrometheus.Name, "nodeport-service", 30900)
if err := framework.CreatePrometheusAndWaitUntilReady(ns, basicPrometheus); err != nil {
t.Fatal("Creating prometheus failed: ", err)
}
if finalizerFn, err := testFramework.CreateServiceAndWaitUntilReady(framework.KubeClient, ns, service); err != nil {
t.Fatal("Creating prometheus service failed: ", err)
} else {
ctx.AddFinalizerFn(finalizerFn)
}
resp, err := http.Get(fmt.Sprintf("http://%s:30900/metrics", framework.ClusterIP))
if err != nil {
t.Fatal("Retrieving prometheus metrics failed with error: ", err)
} else if resp.StatusCode != 200 {
t.Fatal("Retrieving prometheus metrics failed with http status code: ", resp.StatusCode)
}
}
func TestExposingPrometheusWithKubernetesAPI(t *testing.T) {
t.Parallel()
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
basicPrometheus := framework.MakeBasicPrometheus(ns, "basic-prometheus", "test-group", 1)
service := framework.MakePrometheusService(basicPrometheus.Name, "test-group", v1.ServiceTypeClusterIP)
@ -434,47 +418,65 @@ func TestExposingPrometheusWithKubernetesAPI(t *testing.T) {
}
}
func TestExposingPrometheusWithIngress(t *testing.T) {
func TestPrometheusDiscoverTargetPort(t *testing.T) {
t.Parallel()
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
prometheus := framework.MakeBasicPrometheus(ns, "main", "test-group", 1)
prometheusService := framework.MakePrometheusService(prometheus.Name, "test-group", v1.ServiceTypeClusterIP)
ingress := testFramework.MakeBasicIngress(prometheusService.Name, 9090)
prometheusName := "test"
group := "servicediscovery-test"
svc := framework.MakePrometheusService(prometheusName, group, v1.ServiceTypeClusterIP)
err := testFramework.SetupNginxIngressControllerIncDefaultBackend(framework.KubeClient, ns)
if err != nil {
if _, err := framework.MonClient.ServiceMonitors(ns).Create(&v1alpha1.ServiceMonitor{
ObjectMeta: metav1.ObjectMeta{
Name: prometheusName,
Labels: map[string]string{
"group": group,
},
},
Spec: v1alpha1.ServiceMonitorSpec{
Selector: metav1.LabelSelector{
MatchLabels: map[string]string{
"group": group,
},
},
Endpoints: []v1alpha1.Endpoint{
v1alpha1.Endpoint{
TargetPort: intstr.FromInt(9090),
Interval: "30s",
},
},
},
}); err != nil {
t.Fatal("Creating ServiceMonitor failed: ", err)
}
p := framework.MakeBasicPrometheus(ns, prometheusName, group, 1)
if err := framework.CreatePrometheusAndWaitUntilReady(ns, p); err != nil {
t.Fatal(err)
}
err = framework.CreatePrometheusAndWaitUntilReady(ns, prometheus)
if err != nil {
t.Fatal(err)
if finalizerFn, err := testFramework.CreateServiceAndWaitUntilReady(framework.KubeClient, ns, svc); err != nil {
t.Fatal(errors.Wrap(err, "creating prometheus service failed"))
} else {
ctx.AddFinalizerFn(finalizerFn)
}
if _, err := testFramework.CreateServiceAndWaitUntilReady(framework.KubeClient, ns, prometheusService); err != nil {
t.Fatal(err)
_, err := framework.KubeClient.CoreV1().Secrets(ns).Get(fmt.Sprintf("prometheus-%s", prometheusName), metav1.GetOptions{})
if err != nil {
t.Fatal("Generated Secret could not be retrieved: ", err)
}
err = testFramework.CreateIngress(framework.KubeClient, ns, ingress)
err = wait.Poll(time.Second, 3*time.Minute, isDiscoveryWorking(ns, svc.Name, prometheusName))
if err != nil {
t.Fatal(err)
}
ip, err := testFramework.GetIngressIP(framework.KubeClient, ns, ingress.Name)
if err != nil {
t.Fatal(err)
}
err = testFramework.WaitForHTTPSuccessStatusCode(time.Minute, fmt.Sprintf("http://%s:/metrics", *ip))
if err != nil {
t.Fatal(err)
t.Fatal(errors.Wrap(err, "validating Prometheus target discovery failed"))
}
}
func isDiscoveryWorking(ns, prometheusName string) func() (bool, error) {
func isDiscoveryWorking(ns, svcName, prometheusName string) func() (bool, error) {
return func() (bool, error) {
pods, err := framework.KubeClient.CoreV1().Pods(ns).List(prometheus.ListOptions(prometheusName))
if err != nil {
@ -486,7 +488,7 @@ func isDiscoveryWorking(ns, prometheusName string) func() (bool, error) {
podIP := pods.Items[0].Status.PodIP
expectedTargets := []string{fmt.Sprintf("http://%s:9090/metrics", podIP)}
activeTargets, err := framework.GetActiveTargets()
activeTargets, err := framework.GetActiveTargets(ns, svcName)
if err != nil {
return false, err
}
@ -495,7 +497,7 @@ func isDiscoveryWorking(ns, prometheusName string) func() (bool, error) {
return false, nil
}
working, err := basicQueryWorking()
working, err := basicQueryWorking(ns, svcName)
if err != nil {
return false, err
}
@ -522,15 +524,14 @@ type prometheusQueryAPIResponse struct {
Data *queryResult `json:"data"`
}
func basicQueryWorking() (bool, error) {
resp, err := http.Get(fmt.Sprintf("http://%s:30900/api/v1/query?query=up", framework.ClusterIP))
func basicQueryWorking(ns, svcName string) (bool, error) {
response, err := framework.QueryPrometheusSVC(ns, svcName, "/api/v1/query", map[string]string{"query": "up"})
if err != nil {
return false, err
}
defer resp.Body.Close()
rq := prometheusQueryAPIResponse{}
if err := json.NewDecoder(resp.Body).Decode(&rq); err != nil {
if err := json.NewDecoder(response).Decode(&rq); err != nil {
return false, err
}
@ -542,7 +543,7 @@ func basicQueryWorking() (bool, error) {
return true, nil
}
func isAlertmanagerDiscoveryWorking(ns, alertmanagerName string) func() (bool, error) {
func isAlertmanagerDiscoveryWorking(ns, promSVCName, alertmanagerName string) func() (bool, error) {
return func() (bool, error) {
pods, err := framework.KubeClient.CoreV1().Pods(ns).List(alertmanager.ListOptions(alertmanagerName))
if err != nil {
@ -556,14 +557,13 @@ func isAlertmanagerDiscoveryWorking(ns, alertmanagerName string) func() (bool, e
expectedAlertmanagerTargets = append(expectedAlertmanagerTargets, fmt.Sprintf("http://%s:9093/api/v1/alerts", p.Status.PodIP))
}
resp, err := http.Get(fmt.Sprintf("http://%s:30900/api/v1/alertmanagers", framework.ClusterIP))
response, err := framework.QueryPrometheusSVC(ns, promSVCName, "/api/v1/alertmanagers", map[string]string{})
if err != nil {
return false, err
}
defer resp.Body.Close()
ra := prometheusAlertmanagerAPIResponse{}
if err := json.NewDecoder(resp.Body).Decode(&ra); err != nil {
if err := json.NewDecoder(response).Decode(&ra); err != nil {
return false, err
}

20
vendor/github.com/cenkalti/backoff/LICENSE generated vendored Normal file
View file

@ -0,0 +1,20 @@
The MIT License (MIT)
Copyright (c) 2014 Cenk Altı
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

30
vendor/github.com/cenkalti/backoff/README.md generated vendored Normal file
View file

@ -0,0 +1,30 @@
# Exponential Backoff [![GoDoc][godoc image]][godoc] [![Build Status][travis image]][travis] [![Coverage Status][coveralls image]][coveralls]
This is a Go port of the exponential backoff algorithm from [Google's HTTP Client Library for Java][google-http-java-client].
[Exponential backoff][exponential backoff wiki]
is an algorithm that uses feedback to multiplicatively decrease the rate of some process,
in order to gradually find an acceptable rate.
The retries exponentially increase and stop increasing when a certain threshold is met.
## Usage
See https://godoc.org/github.com/cenkalti/backoff#pkg-examples
## Contributing
* I would like to keep this library as small as possible.
* Please don't send a PR without opening an issue and discussing it first.
* If proposed change is not a common use case, I will probably not accept it.
[godoc]: https://godoc.org/github.com/cenkalti/backoff
[godoc image]: https://godoc.org/github.com/cenkalti/backoff?status.png
[travis]: https://travis-ci.org/cenkalti/backoff
[travis image]: https://travis-ci.org/cenkalti/backoff.png?branch=master
[coveralls]: https://coveralls.io/github/cenkalti/backoff?branch=master
[coveralls image]: https://coveralls.io/repos/github/cenkalti/backoff/badge.svg?branch=master
[google-http-java-client]: https://github.com/google/google-http-java-client
[exponential backoff wiki]: http://en.wikipedia.org/wiki/Exponential_backoff
[advanced example]: https://godoc.org/github.com/cenkalti/backoff#example_

66
vendor/github.com/cenkalti/backoff/backoff.go generated vendored Normal file
View file

@ -0,0 +1,66 @@
// Package backoff implements backoff algorithms for retrying operations.
//
// Use Retry function for retrying operations that may fail.
// If Retry does not meet your needs,
// copy/paste the function into your project and modify as you wish.
//
// There is also Ticker type similar to time.Ticker.
// You can use it if you need to work with channels.
//
// See Examples section below for usage examples.
package backoff
import "time"
// BackOff is a backoff policy for retrying an operation.
type BackOff interface {
// NextBackOff returns the duration to wait before retrying the operation,
// or backoff.Stop to indicate that no more retries should be made.
//
// Example usage:
//
// duration := backoff.NextBackOff();
// if (duration == backoff.Stop) {
// // Do not retry operation.
// } else {
// // Sleep for duration and retry operation.
// }
//
NextBackOff() time.Duration
// Reset to initial state.
Reset()
}
// Stop indicates that no more retries should be made for use in NextBackOff().
const Stop time.Duration = -1
// ZeroBackOff is a fixed backoff policy whose backoff time is always zero,
// meaning that the operation is retried immediately without waiting, indefinitely.
type ZeroBackOff struct{}
func (b *ZeroBackOff) Reset() {}
func (b *ZeroBackOff) NextBackOff() time.Duration { return 0 }
// StopBackOff is a fixed backoff policy that always returns backoff.Stop for
// NextBackOff(), meaning that the operation should never be retried.
type StopBackOff struct{}
func (b *StopBackOff) Reset() {}
func (b *StopBackOff) NextBackOff() time.Duration { return Stop }
// ConstantBackOff is a backoff policy that always returns the same backoff delay.
// This is in contrast to an exponential backoff policy,
// which returns a delay that grows longer as you call NextBackOff() over and over again.
type ConstantBackOff struct {
Interval time.Duration
}
func (b *ConstantBackOff) Reset() {}
func (b *ConstantBackOff) NextBackOff() time.Duration { return b.Interval }
func NewConstantBackOff(d time.Duration) *ConstantBackOff {
return &ConstantBackOff{Interval: d}
}

60
vendor/github.com/cenkalti/backoff/context.go generated vendored Normal file
View file

@ -0,0 +1,60 @@
package backoff
import (
"time"
"golang.org/x/net/context"
)
// BackOffContext is a backoff policy that stops retrying after the context
// is canceled.
type BackOffContext interface {
BackOff
Context() context.Context
}
type backOffContext struct {
BackOff
ctx context.Context
}
// WithContext returns a BackOffContext with context ctx
//
// ctx must not be nil
func WithContext(b BackOff, ctx context.Context) BackOffContext {
if ctx == nil {
panic("nil context")
}
if b, ok := b.(*backOffContext); ok {
return &backOffContext{
BackOff: b.BackOff,
ctx: ctx,
}
}
return &backOffContext{
BackOff: b,
ctx: ctx,
}
}
func ensureContext(b BackOff) BackOffContext {
if cb, ok := b.(BackOffContext); ok {
return cb
}
return WithContext(b, context.Background())
}
func (b *backOffContext) Context() context.Context {
return b.ctx
}
func (b *backOffContext) NextBackOff() time.Duration {
select {
case <-b.Context().Done():
return Stop
default:
return b.BackOff.NextBackOff()
}
}

151
vendor/github.com/cenkalti/backoff/exponential.go generated vendored Normal file
View file

@ -0,0 +1,151 @@
package backoff
import (
"math/rand"
"time"
)
/*
ExponentialBackOff is a backoff implementation that increases the backoff
period for each retry attempt using a randomization function that grows exponentially.
NextBackOff() is calculated using the following formula:
randomized interval =
RetryInterval * (random value in range [1 - RandomizationFactor, 1 + RandomizationFactor])
In other words NextBackOff() will range between the randomization factor
percentage below and above the retry interval.
For example, given the following parameters:
RetryInterval = 2
RandomizationFactor = 0.5
Multiplier = 2
the actual backoff period used in the next retry attempt will range between 1 and 3 seconds,
multiplied by the exponential, that is, between 2 and 6 seconds.
Note: MaxInterval caps the RetryInterval and not the randomized interval.
If the time elapsed since an ExponentialBackOff instance is created goes past the
MaxElapsedTime, then the method NextBackOff() starts returning backoff.Stop.
The elapsed time can be reset by calling Reset().
Example: Given the following default arguments, for 10 tries the sequence will be,
and assuming we go over the MaxElapsedTime on the 10th try:
Request # RetryInterval (seconds) Randomized Interval (seconds)
1 0.5 [0.25, 0.75]
2 0.75 [0.375, 1.125]
3 1.125 [0.562, 1.687]
4 1.687 [0.8435, 2.53]
5 2.53 [1.265, 3.795]
6 3.795 [1.897, 5.692]
7 5.692 [2.846, 8.538]
8 8.538 [4.269, 12.807]
9 12.807 [6.403, 19.210]
10 19.210 backoff.Stop
Note: Implementation is not thread-safe.
*/
type ExponentialBackOff struct {
InitialInterval time.Duration
RandomizationFactor float64
Multiplier float64
MaxInterval time.Duration
// After MaxElapsedTime the ExponentialBackOff stops.
// It never stops if MaxElapsedTime == 0.
MaxElapsedTime time.Duration
Clock Clock
currentInterval time.Duration
startTime time.Time
}
// Clock is an interface that returns current time for BackOff.
type Clock interface {
Now() time.Time
}
// Default values for ExponentialBackOff.
const (
DefaultInitialInterval = 500 * time.Millisecond
DefaultRandomizationFactor = 0.5
DefaultMultiplier = 1.5
DefaultMaxInterval = 60 * time.Second
DefaultMaxElapsedTime = 15 * time.Minute
)
// NewExponentialBackOff creates an instance of ExponentialBackOff using default values.
func NewExponentialBackOff() *ExponentialBackOff {
b := &ExponentialBackOff{
InitialInterval: DefaultInitialInterval,
RandomizationFactor: DefaultRandomizationFactor,
Multiplier: DefaultMultiplier,
MaxInterval: DefaultMaxInterval,
MaxElapsedTime: DefaultMaxElapsedTime,
Clock: SystemClock,
}
b.Reset()
return b
}
type systemClock struct{}
func (t systemClock) Now() time.Time {
return time.Now()
}
// SystemClock implements Clock interface that uses time.Now().
var SystemClock = systemClock{}
// Reset the interval back to the initial retry interval and restarts the timer.
func (b *ExponentialBackOff) Reset() {
b.currentInterval = b.InitialInterval
b.startTime = b.Clock.Now()
}
// NextBackOff calculates the next backoff interval using the formula:
// Randomized interval = RetryInterval +/- (RandomizationFactor * RetryInterval)
func (b *ExponentialBackOff) NextBackOff() time.Duration {
// Make sure we have not gone over the maximum elapsed time.
if b.MaxElapsedTime != 0 && b.GetElapsedTime() > b.MaxElapsedTime {
return Stop
}
defer b.incrementCurrentInterval()
return getRandomValueFromInterval(b.RandomizationFactor, rand.Float64(), b.currentInterval)
}
// GetElapsedTime returns the elapsed time since an ExponentialBackOff instance
// is created and is reset when Reset() is called.
//
// The elapsed time is computed using time.Now().UnixNano().
func (b *ExponentialBackOff) GetElapsedTime() time.Duration {
return b.Clock.Now().Sub(b.startTime)
}
// Increments the current interval by multiplying it with the multiplier.
func (b *ExponentialBackOff) incrementCurrentInterval() {
// Check for overflow, if overflow is detected set the current interval to the max interval.
if float64(b.currentInterval) >= float64(b.MaxInterval)/b.Multiplier {
b.currentInterval = b.MaxInterval
} else {
b.currentInterval = time.Duration(float64(b.currentInterval) * b.Multiplier)
}
}
// Returns a random value from the following interval:
// [randomizationFactor * currentInterval, randomizationFactor * currentInterval].
func getRandomValueFromInterval(randomizationFactor, random float64, currentInterval time.Duration) time.Duration {
var delta = randomizationFactor * float64(currentInterval)
var minInterval = float64(currentInterval) - delta
var maxInterval = float64(currentInterval) + delta
// Get a random value from the range [minInterval, maxInterval].
// The formula used below has a +1 because if the minInterval is 1 and the maxInterval is 3 then
// we want a 33% chance for selecting either 1, 2 or 3.
return time.Duration(minInterval + (random * (maxInterval - minInterval + 1)))
}

78
vendor/github.com/cenkalti/backoff/retry.go generated vendored Normal file
View file

@ -0,0 +1,78 @@
package backoff
import "time"
// An Operation is executing by Retry() or RetryNotify().
// The operation will be retried using a backoff policy if it returns an error.
type Operation func() error
// Notify is a notify-on-error function. It receives an operation error and
// backoff delay if the operation failed (with an error).
//
// NOTE that if the backoff policy stated to stop retrying,
// the notify function isn't called.
type Notify func(error, time.Duration)
// Retry the operation o until it does not return error or BackOff stops.
// o is guaranteed to be run at least once.
// It is the caller's responsibility to reset b after Retry returns.
//
// If o returns a *PermanentError, the operation is not retried, and the
// wrapped error is returned.
//
// Retry sleeps the goroutine for the duration returned by BackOff after a
// failed operation returns.
func Retry(o Operation, b BackOff) error { return RetryNotify(o, b, nil) }
// RetryNotify calls notify function with the error and wait duration
// for each failed attempt before sleep.
func RetryNotify(operation Operation, b BackOff, notify Notify) error {
var err error
var next time.Duration
cb := ensureContext(b)
b.Reset()
for {
if err = operation(); err == nil {
return nil
}
if permanent, ok := err.(*PermanentError); ok {
return permanent.Err
}
if next = b.NextBackOff(); next == Stop {
return err
}
if notify != nil {
notify(err, next)
}
t := time.NewTimer(next)
select {
case <-cb.Context().Done():
t.Stop()
return err
case <-t.C:
}
}
}
// PermanentError signals that the operation should not be retried.
type PermanentError struct {
Err error
}
func (e *PermanentError) Error() string {
return e.Err.Error()
}
// Permanent wraps the given err in a *PermanentError.
func Permanent(err error) *PermanentError {
return &PermanentError{
Err: err,
}
}

81
vendor/github.com/cenkalti/backoff/ticker.go generated vendored Normal file
View file

@ -0,0 +1,81 @@
package backoff
import (
"runtime"
"sync"
"time"
)
// Ticker holds a channel that delivers `ticks' of a clock at times reported by a BackOff.
//
// Ticks will continue to arrive when the previous operation is still running,
// so operations that take a while to fail could run in quick succession.
type Ticker struct {
C <-chan time.Time
c chan time.Time
b BackOffContext
stop chan struct{}
stopOnce sync.Once
}
// NewTicker returns a new Ticker containing a channel that will send the time at times
// specified by the BackOff argument. Ticker is guaranteed to tick at least once.
// The channel is closed when Stop method is called or BackOff stops.
func NewTicker(b BackOff) *Ticker {
c := make(chan time.Time)
t := &Ticker{
C: c,
c: c,
b: ensureContext(b),
stop: make(chan struct{}),
}
go t.run()
runtime.SetFinalizer(t, (*Ticker).Stop)
return t
}
// Stop turns off a ticker. After Stop, no more ticks will be sent.
func (t *Ticker) Stop() {
t.stopOnce.Do(func() { close(t.stop) })
}
func (t *Ticker) run() {
c := t.c
defer close(c)
t.b.Reset()
// Ticker is guaranteed to tick at least once.
afterC := t.send(time.Now())
for {
if afterC == nil {
return
}
select {
case tick := <-afterC:
afterC = t.send(tick)
case <-t.stop:
t.c = nil // Prevent future ticks from being sent to the channel.
return
case <-t.b.Context().Done():
return
}
}
}
func (t *Ticker) send(tick time.Time) <-chan time.Time {
select {
case t.c <- tick:
case <-t.stop:
return nil
}
next := t.b.NextBackOff()
if next == Stop {
t.Stop()
return nil
}
return time.After(next)
}

6
vendor/vendor.json vendored
View file

@ -20,6 +20,12 @@
"revision": "4c0e84591b9aa9e6dcfdf3e020114cd81f89d5f9",
"revisionTime": "2016-08-04T10:47:26Z"
},
{
"checksumSHA1": "NfQBkfSVHEmnR2OaORxdOXmhLcs=",
"path": "github.com/cenkalti/backoff",
"revision": "5d150e7eec023ce7a124856b37c68e54b4050ac7",
"revisionTime": "2017-03-29T03:22:34Z"
},
{
"checksumSHA1": "jyYz6OqzmWw6CAfSc8WwjDB1S3k=",
"path": "github.com/emicklei/go-restful",