1
0
Fork 0
mirror of https://github.com/prometheus-operator/prometheus-operator.git synced 2025-04-21 03:38:43 +00:00

Merge branch 'master' into make-folder

This commit is contained in:
Frederic Branczyk 2018-06-27 11:34:06 +02:00 committed by GitHub
commit 2741a7cb09
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
39 changed files with 1188 additions and 183 deletions

View file

@ -26,6 +26,7 @@ This Document documents the types introduced by the Prometheus Operator to be co
* [PrometheusRuleSpec](#prometheusrulespec)
* [PrometheusSpec](#prometheusspec)
* [PrometheusStatus](#prometheusstatus)
* [QueueConfig](#queueconfig)
* [RelabelConfig](#relabelconfig)
* [RemoteReadSpec](#remotereadspec)
* [RemoteWriteSpec](#remotewritespec)
@ -36,6 +37,9 @@ This Document documents the types introduced by the Prometheus Operator to be co
* [ServiceMonitorSpec](#servicemonitorspec)
* [StorageSpec](#storagespec)
* [TLSConfig](#tlsconfig)
* [ThanosGCSSpec](#thanosgcsspec)
* [ThanosS3Spec](#thanoss3spec)
* [ThanosSpec](#thanosspec)
## AlertingSpec
@ -265,6 +269,7 @@ Specification of the desired behavior of the Prometheus cluster. More info: http
| containers | Containers allows injecting additional containers. This is meant to allow adding an authentication proxy to a Prometheus pod. | []v1.Container | false |
| additionalScrapeConfigs | AdditionalScrapeConfigs allows specifying a key of a Secret containing additional Prometheus scrape configurations. Scrape configurations specified are appended to the configurations generated by the Prometheus Operator. Job configurations specified must have the form as specified in the official Prometheus documentation: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#<scrape_config>. As scrape configs are appended, the user is responsible to make sure it is valid. Note that using this feature may expose the possibility to break upgrades of Prometheus. It is advised to review Prometheus release notes to ensure that no incompatible scrape configs are going to break Prometheus after the upgrade. | *[v1.SecretKeySelector](https://v1-6.docs.kubernetes.io/docs/api-reference/v1.6/#secretkeyselector-v1-core) | false |
| additionalAlertManagerConfigs | AdditionalAlertManagerConfigs allows specifying a key of a Secret containing additional Prometheus AlertManager configurations. AlertManager configurations specified are appended to the configurations generated by the Prometheus Operator. Job configurations specified must have the form as specified in the official Prometheus documentation: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#<alertmanager_config>. As AlertManager configs are appended, the user is responsible to make sure it is valid. Note that using this feature may expose the possibility to break upgrades of Prometheus. It is advised to review Prometheus release notes to ensure that no incompatible AlertManager configs are going to break Prometheus after the upgrade. | *[v1.SecretKeySelector](https://v1-6.docs.kubernetes.io/docs/api-reference/v1.6/#secretkeyselector-v1-core) | false |
| thanos | Thanos configuration allows configuring various aspects of a Prometheus server in a Thanos environment.\n\nThis section is experimental, it may change significantly without deprecation notice in any release.\n\nThis is experimental and may change significantly without backward compatibility in any release. | *[ThanosSpec](#thanosspec) | false |
[Back to TOC](#table-of-contents)
@ -282,6 +287,22 @@ Most recent observed status of the Prometheus cluster. Read-only. Not included w
[Back to TOC](#table-of-contents)
## QueueConfig
QueueConfig allows the tuning of remote_write queue_config parameters. This object is referenced in the RemoteWriteSpec object.
| Field | Description | Scheme | Required |
| ----- | ----------- | ------ | -------- |
| capacity | Capacity is the number of samples to buffer per shard before we start dropping them. | int | false |
| maxShards | MaxShards is the maximum number of shards, i.e. amount of concurrency. | int | false |
| maxSamplesPerSend | MaxSamplesPerSend is the maximum number of samples per send. | int | false |
| batchSendDeadline | BatchSendDeadline is the maximum time a sample will wait in buffer. | string | false |
| maxRetries | MaxRetries is the maximum number of times to retry a batch on recoverable errors. | int | false |
| minBackoff | MinBackoff is the initial retry delay. Gets doubled for every retry. | string | false |
| maxBackoff | MaxBackoff is the maximum retry delay. | string | false |
[Back to TOC](#table-of-contents)
## RelabelConfig
RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs
@ -330,6 +351,7 @@ RemoteWriteSpec defines the remote_write configuration for prometheus.
| bearerTokenFile | File to read bearer token for remote write. | string | false |
| tlsConfig | TLS Config to use for remote write. | *[TLSConfig](#tlsconfig) | false |
| proxyUrl | Optional ProxyURL | string | false |
| queueConfig | QueueConfig allows tuning of the remote write queue parameters. | *[QueueConfig](#queueconfig) | false |
[Back to TOC](#table-of-contents)
@ -423,3 +445,42 @@ TLSConfig specifies TLS configuration parameters.
| insecureSkipVerify | Disable target certificate validation. | bool | false |
[Back to TOC](#table-of-contents)
## ThanosGCSSpec
ThanosGCSSpec defines parameters for use of Google Cloud Storage (GCS) with Thanos.
| Field | Description | Scheme | Required |
| ----- | ----------- | ------ | -------- |
| bucket | Google Cloud Storage bucket name for stored blocks. If empty it won't store any block inside Google Cloud Storage. | *string | false |
[Back to TOC](#table-of-contents)
## ThanosS3Spec
ThanosSpec defines parameters for of AWS Simple Storage Service (S3) with Thanos. (S3 compatible services apply as well)
| Field | Description | Scheme | Required |
| ----- | ----------- | ------ | -------- |
| bucket | S3-Compatible API bucket name for stored blocks. | *string | false |
| endpoint | S3-Compatible API endpoint for stored blocks. | *string | false |
| accessKey | AccessKey for an S3-Compatible API. | *[v1.SecretKeySelector](https://v1-6.docs.kubernetes.io/docs/api-reference/v1.6/#secretkeyselector-v1-core) | false |
| secretKey | SecretKey for an S3-Compatible API. | *[v1.SecretKeySelector](https://v1-6.docs.kubernetes.io/docs/api-reference/v1.6/#secretkeyselector-v1-core) | false |
| insecure | Whether to use an insecure connection with an S3-Compatible API. | *bool | false |
| signatureVersion2 | Whether to use S3 Signature Version 2; otherwise Signature Version 4 will be used. | *bool | false |
[Back to TOC](#table-of-contents)
## ThanosSpec
ThanosSpec defines parameters for a Prometheus server within a Thanos deployment.
| Field | Description | Scheme | Required |
| ----- | ----------- | ------ | -------- |
| peers | Peers is a DNS name for Thanos to discover peers through. | *string | false |
| version | Version describes the version of Thanos to use. | *string | false |
| baseImage | Thanos base image if other than default. | *string | false |
| gcs | GCS configures use of GCS in Thanos. | *[ThanosGCSSpec](#thanosgcsspec) | true |
| s3 | S3 configures use of S3 in Thanos. | *[ThanosS3Spec](#thanoss3spec) | true |
[Back to TOC](#table-of-contents)

View file

@ -181,7 +181,7 @@ spec:
- args:
- --secure-listen-address=:9100
- --upstream=http://127.0.0.1:9101/
image: quay.io/coreos/kube-rbac-proxy:v0.3.0
image: quay.io/coreos/kube-rbac-proxy:v0.3.1
name: kube-rbac-proxy
ports:
- containerPort: 9100
@ -272,7 +272,7 @@ spec:
- args:
- --secure-listen-address=:8443
- --upstream=http://127.0.0.1:8081/
image: quay.io/coreos/kube-rbac-proxy:v0.3.0
image: quay.io/coreos/kube-rbac-proxy:v0.3.1
name: kube-rbac-proxy-main
ports:
- containerPort: 8443
@ -287,7 +287,7 @@ spec:
- args:
- --secure-listen-address=:9443
- --upstream=http://127.0.0.1:8082/
image: quay.io/coreos/kube-rbac-proxy:v0.3.0
image: quay.io/coreos/kube-rbac-proxy:v0.3.1
name: kube-rbac-proxy-self
ports:
- containerPort: 9443
@ -304,7 +304,7 @@ spec:
- --port=8081
- --telemetry-host=127.0.0.1
- --telemetry-port=8082
image: quay.io/coreos/kube-state-metrics:v1.3.0
image: quay.io/coreos/kube-state-metrics:v1.3.1
name: kube-state-metrics
resources:
limits:
@ -411,7 +411,7 @@ spec:
matchExpressions:
- key: k8s-app
operator: Exists
version: v2.2.1
version: v2.3.1
```
> Make sure that the `ServiceAccount` called `prometheus-k8s` exists and if using RBAC, is bound to the correct role. Read more on [RBAC when using the Prometheus Operator](../rbac.md).
@ -595,7 +595,7 @@ spec:
beta.kubernetes.io/os: linux
replicas: 3
serviceAccountName: alertmanager-main
version: v0.14.0
version: v0.15.0
```
Read more in the [alerting guide](alerting.md) on how to configure the Alertmanager as it will not spin up unless it has a valid configuration mounted through a `Secret`. Note that the `Secret` has to be in the same namespace as the `Alertmanager` resource as well as have the name `alertmanager-<name-of-alertmanager-object` and the key of the configuration is `alertmanager.yaml`.

View file

@ -1,6 +1,7 @@
SHELL=/bin/bash -o pipefail
REPO?=quay.io/coreos/prometheus-operator
REPO_PROMETHEUS_CONFIG_RELOADER?=quay.io/coreos/prometheus-config-reloader
TAG?=$(shell git rev-parse --short HEAD)
PO_CRDGEN_BINARY:=$(GOPATH)/bin/po-crdgen
@ -36,7 +37,7 @@ prometheus-config-reloader:
-ldflags "-X github.com/coreos/prometheus-operator/pkg/version.Version=$(shell cat VERSION)" \
-o $@ cmd/$@/main.go
pkg/client/monitoring/v1/zz_generated.deepcopy.go: $(DEEPCOPY_GEN_BINARY)
pkg/client/monitoring/v1/zz_generated.deepcopy.go: .header pkg/client/monitoring/v1/types.go $(DEEPCOPY_GEN_BINARY)
$(DEEPCOPY_GEN_BINARY) \
-i github.com/coreos/prometheus-operator/pkg/client/monitoring/v1 \
--go-header-file="$(GOPATH)/src/github.com/coreos/prometheus-operator/.header" \
@ -68,7 +69,7 @@ hack/prometheus-config-reloader-image: cmd/prometheus-config-reloader/Dockerfile
# Create empty target file, for the sole purpose of recording when this target
# was last executed via the last-modification timestamp on the file. See
# https://www.gnu.org/software/make/manual/make.html#Empty-Targets
docker build -t quay.io/coreos/prometheus-config-reloader:$(TAG) -f cmd/prometheus-config-reloader/Dockerfile .
docker build -t $(REPO_PROMETHEUS_CONFIG_RELOADER):$(TAG) -f cmd/prometheus-config-reloader/Dockerfile .
touch $@
@ -77,7 +78,7 @@ hack/prometheus-config-reloader-image: cmd/prometheus-config-reloader/Dockerfile
##############
.PHONY: generate
generate: Documentation/*
generate: pkg/client/monitoring/v1/zz_generated.deepcopy.go pkg/client/monitoring/v1/openapi_generated.go kube-prometheus Documentation/*
.PHONY: generate-in-docker
generate-in-docker: hack/jsonnet-docker-image

View file

@ -78,6 +78,7 @@ func init() {
flagset.StringVar(&cfg.ConfigReloaderImage, "config-reloader-image", "quay.io/coreos/configmap-reload:v0.0.1", "Reload Image")
flagset.StringVar(&cfg.AlertmanagerDefaultBaseImage, "alertmanager-default-base-image", "quay.io/prometheus/alertmanager", "Alertmanager default base image")
flagset.StringVar(&cfg.PrometheusDefaultBaseImage, "prometheus-default-base-image", "quay.io/prometheus/prometheus", "Prometheus default base image")
flagset.StringVar(&cfg.ThanosDefaultBaseImage, "thanos-default-base-image", "improbable/thanos", "Thanos default base image")
flagset.StringVar(&cfg.Namespace, "namespace", v1.NamespaceAll, "Namespace to scope the interaction of the Prometheus Operator and the apiserver.")
flagset.Var(&cfg.Labels, "labels", "Labels to be add to all resources created by the operator")
flagset.StringVar(&cfg.CrdGroup, "crd-apigroup", monitoringv1.Group, "prometheus CRD API group name")

View file

@ -1,73 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: Prometheus
metadata:
name: self
labels:
prometheus: self
spec:
podMetadata:
labels:
thanos-peer: 'true'
replicas: 2
version: v2.2.1
serviceAccountName: prometheus-k8s
serviceMonitorSelector:
matchLabels:
app: prometheus
ruleSelector:
matchLabels:
role: prometheus-rulefiles
prometheus: k8s
resources:
requests:
# 2Gi is default, but won't schedule if you don't have a node with >2Gi
# memory. Modify based on your target and time-series count for
# production use. This value is mainly meant for demonstration/testing
# purposes.
memory: 400Mi
containers:
- name: thanos
image: improbable/thanos:latest
args:
- "sidecar"
- "--log.level=debug"
- "--cluster.peers=thanos-peers.default.svc:10900"
ports:
- name: http
containerPort: 10902
- name: grpc
containerPort: 10901
- name: cluster
containerPort: 10900
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: prometheus
labels:
app: prometheus
spec:
selector:
matchLabels:
app: prometheus
endpoints:
- port: web
interval: 30s
---
apiVersion: v1
kind: Service
metadata:
labels:
app: prometheus
prometheus: self
name: prometheus-self
spec:
type: NodePort
ports:
- name: web
nodePort: 30900
port: 9090
protocol: TCP
targetPort: web
selector:
prometheus: self

View file

@ -5,7 +5,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
namespace: 'default',
versions+:: {
alertmanager: 'v0.14.0',
alertmanager: 'v0.15.0',
},
imageRepos+:: {

View file

@ -5,8 +5,8 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
namespace: 'default',
versions+:: {
kubeStateMetrics: 'v1.3.0',
kubeRbacProxy: 'v0.3.0',
kubeStateMetrics: 'v1.3.1',
kubeRbacProxy: 'v0.3.1',
addonResizer: '1.0',
},

View file

@ -6,7 +6,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
versions+:: {
nodeExporter: 'v0.15.2',
kubeRbacProxy: 'v0.3.0',
kubeRbacProxy: 'v0.3.1',
},
imageRepos+:: {

View file

@ -5,7 +5,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
namespace: 'default',
versions+:: {
prometheus: 'v2.2.1',
prometheus: 'v2.3.1',
},
imageRepos+:: {

View file

@ -2672,6 +2672,77 @@ spec:
phase:
description: Phase represents the current phase of PersistentVolumeClaim.
type: string
thanos:
description: ThanosSpec defines parameters for a Prometheus server within
a Thanos deployment.
properties:
baseImage:
description: Thanos base image if other than default.
type: string
gcs:
description: ThanosGCSSpec defines parameters for use of Google
Cloud Storage (GCS) with Thanos.
properties:
bucket:
description: Google Cloud Storage bucket name for stored blocks.
If empty it won't store any block inside Google Cloud Storage.
type: string
peers:
description: Peers is a DNS name for Thanos to discover peers through.
type: string
s3:
description: ThanosSpec defines parameters for of AWS Simple Storage
Service (S3) with Thanos. (S3 compatible services apply as well)
properties:
accessKey:
description: SecretKeySelector selects a key of a Secret.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
type: string
optional:
description: Specify whether the Secret or it's key must
be defined
type: boolean
required:
- key
bucket:
description: S3-Compatible API bucket name for stored blocks.
type: string
endpoint:
description: S3-Compatible API endpoint for stored blocks.
type: string
insecure:
description: Whether to use an insecure connection with an S3-Compatible
API.
type: boolean
secretKey:
description: SecretKeySelector selects a key of a Secret.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
type: string
optional:
description: Specify whether the Secret or it's key must
be defined
type: boolean
required:
- key
signatureVersion2:
description: Whether to use S3 Signature Version 2; otherwise
Signature Version 4 will be used.
type: boolean
version:
description: Version describes the version of Thanos to use.
type: string
tolerations:
description: If specified, the pod's tolerations.
items:

View file

@ -11,4 +11,4 @@ spec:
beta.kubernetes.io/os: linux
replicas: 3
serviceAccountName: alertmanager-main
version: v0.14.0
version: v0.15.0

View file

@ -64,7 +64,7 @@ items:
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{node}}",
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
}
],
@ -150,7 +150,7 @@ items:
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{node}}",
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
}
],
@ -248,7 +248,7 @@ items:
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{node}}",
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
}
],
@ -334,7 +334,7 @@ items:
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{node}}",
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
}
],
@ -432,7 +432,7 @@ items:
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{node}}",
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
}
],
@ -518,7 +518,7 @@ items:
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{node}}",
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
}
],
@ -616,7 +616,7 @@ items:
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{node}}",
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
}
],
@ -702,7 +702,7 @@ items:
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{node}}",
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
}
],
@ -800,7 +800,7 @@ items:
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{node}}",
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
}
],
@ -909,6 +909,7 @@ items:
},
"timezone": "utc",
"title": "K8s / USE Method / Cluster",
"uid": "a6e7d1362e1ddbb79db21d5bb40d7137",
"version": 0
}
kind: ConfigMap
@ -1851,6 +1852,7 @@ items:
},
"timezone": "utc",
"title": "K8s / USE Method / Node",
"uid": "4ac4f123aae0ff6dbaf4f4f66120033b",
"version": 0
}
kind: ConfigMap
@ -2468,7 +2470,7 @@ items:
"decimals": 2,
"link": true,
"linkTooltip": "Drill down",
"linkUrl": "/dashboard/file/k8s-resources-namespace.json?var-datasource=$datasource&var-namespace=$__cell",
"linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-namespace=$__cell",
"pattern": "namespace",
"thresholds": [
@ -2828,7 +2830,7 @@ items:
"decimals": 2,
"link": true,
"linkTooltip": "Drill down",
"linkUrl": "/dashboard/file/k8s-resources-namespace.json?var-datasource=$datasource&var-namespace=$__cell",
"linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-namespace=$__cell",
"pattern": "namespace",
"thresholds": [
@ -3000,6 +3002,7 @@ items:
},
"timezone": "utc",
"title": "K8s / Compute Resources / Cluster",
"uid": "efa86fd1d0c121a26444b636a3f509a8",
"version": 0
}
kind: ConfigMap
@ -3269,7 +3272,7 @@ items:
"decimals": 2,
"link": true,
"linkTooltip": "Drill down",
"linkUrl": "/dashboard/file/k8s-resources-pod.json?var-datasource=$datasource&var-namespace=$namespace&var-pod=$__cell",
"linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-namespace=$namespace&var-pod=$__cell",
"pattern": "pod",
"thresholds": [
@ -3629,7 +3632,7 @@ items:
"decimals": 2,
"link": true,
"linkTooltip": "Drill down",
"linkUrl": "/dashboard/file/k8s-resources-pod.json?var-datasource=$datasource&var-namespace=$namespace&var-pod=$__cell",
"linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-namespace=$namespace&var-pod=$__cell",
"pattern": "pod",
"thresholds": [
@ -3828,6 +3831,7 @@ items:
},
"timezone": "utc",
"title": "K8s / Compute Resources / Namespace",
"uid": "85a562078cdf77779eaa1add43ccec1e",
"version": 0
}
kind: ConfigMap
@ -4683,6 +4687,7 @@ items:
},
"timezone": "utc",
"title": "K8s / Compute Resources / Pod",
"uid": "6581e46e4e5c7ba40a07646395ef7b23",
"version": 0
}
kind: ConfigMap
@ -5609,6 +5614,7 @@ items:
},
"timezone": "browser",
"title": "Nodes",
"uid": "fa49a4706d07a042595b664c87fb33ea",
"version": 0
}
kind: ConfigMap
@ -6098,6 +6104,7 @@ items:
},
"timezone": "browser",
"title": "Pods",
"uid": "ab4f13a9892a76a4d21ce8c2445bf4ea",
"version": 0
}
kind: ConfigMap
@ -6950,6 +6957,7 @@ items:
},
"timezone": "browser",
"title": "StatefulSets",
"uid": "a31c1f46e6f727cb37c0d731a7245005",
"version": 0
}
kind: ConfigMap

View file

@ -19,7 +19,7 @@ spec:
- args:
- --secure-listen-address=:8443
- --upstream=http://127.0.0.1:8081/
image: quay.io/coreos/kube-rbac-proxy:v0.3.0
image: quay.io/coreos/kube-rbac-proxy:v0.3.1
name: kube-rbac-proxy-main
ports:
- containerPort: 8443
@ -34,7 +34,7 @@ spec:
- args:
- --secure-listen-address=:9443
- --upstream=http://127.0.0.1:8082/
image: quay.io/coreos/kube-rbac-proxy:v0.3.0
image: quay.io/coreos/kube-rbac-proxy:v0.3.1
name: kube-rbac-proxy-self
ports:
- containerPort: 9443
@ -51,7 +51,7 @@ spec:
- --port=8081
- --telemetry-host=127.0.0.1
- --telemetry-port=8082
image: quay.io/coreos/kube-state-metrics:v1.3.0
image: quay.io/coreos/kube-state-metrics:v1.3.1
name: kube-state-metrics
resources:
limits:

View file

@ -38,7 +38,7 @@ spec:
- args:
- --secure-listen-address=:9100
- --upstream=http://127.0.0.1:9101/
image: quay.io/coreos/kube-rbac-proxy:v0.3.0
image: quay.io/coreos/kube-rbac-proxy:v0.3.1
name: kube-rbac-proxy
ports:
- containerPort: 9100

View file

@ -27,4 +27,4 @@ spec:
matchExpressions:
- key: k8s-app
operator: Exists
version: v2.2.1
version: v2.3.1

View file

@ -202,21 +202,21 @@ spec:
)
record: node:node_memory_swap_io_bytes:sum_rate
- expr: |
avg(irate(node_disk_io_time_ms{job="node-exporter",device=~"(sd|xvd).+"}[1m]) / 1e3)
avg(irate(node_disk_io_time_ms{job="node-exporter",device=~"(sd|xvd|nvme).+"}[1m]) / 1e3)
record: :node_disk_utilisation:avg_irate
- expr: |
avg by (node) (
irate(node_disk_io_time_ms{job="node-exporter",device=~"(sd|xvd).+"}[1m]) / 1e3
irate(node_disk_io_time_ms{job="node-exporter",device=~"(sd|xvd|nvme).+"}[1m]) / 1e3
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: node:node_disk_utilisation:avg_irate
- expr: |
avg(irate(node_disk_io_time_weighted{job="node-exporter",device=~"(sd|xvd).+"}[1m]) / 1e3)
avg(irate(node_disk_io_time_weighted{job="node-exporter",device=~"(sd|xvd|nvme).+"}[1m]) / 1e3)
record: :node_disk_saturation:avg_irate
- expr: |
avg by (node) (
irate(node_disk_io_time_weighted{job="node-exporter",device=~"(sd|xvd).+"}[1m]) / 1e3
irate(node_disk_io_time_weighted{job="node-exporter",device=~"(sd|xvd|nvme).+"}[1m]) / 1e3
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
@ -268,6 +268,7 @@ spec:
- alert: AlertmanagerDown
annotations:
message: Alertmanager has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-alertmanagerdown
expr: |
absent(up{job="alertmanager-main"} == 1)
for: 15m
@ -276,6 +277,7 @@ spec:
- alert: KubeAPIDown
annotations:
message: KubeAPI has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapidown
expr: |
absent(up{job="apiserver"} == 1)
for: 15m
@ -284,6 +286,7 @@ spec:
- alert: KubeControllerManagerDown
annotations:
message: KubeControllerManager has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown
expr: |
absent(up{job="kube-controller-manager"} == 1)
for: 15m
@ -292,6 +295,7 @@ spec:
- alert: KubeSchedulerDown
annotations:
message: KubeScheduler has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeschedulerdown
expr: |
absent(up{job="kube-scheduler"} == 1)
for: 15m
@ -300,6 +304,7 @@ spec:
- alert: KubeStateMetricsDown
annotations:
message: KubeStateMetrics has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricsdown
expr: |
absent(up{job="kube-state-metrics"} == 1)
for: 15m
@ -308,6 +313,7 @@ spec:
- alert: KubeletDown
annotations:
message: Kubelet has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletdown
expr: |
absent(up{job="kubelet"} == 1)
for: 15m
@ -316,6 +322,7 @@ spec:
- alert: NodeExporterDown
annotations:
message: NodeExporter has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeexporterdown
expr: |
absent(up{job="node-exporter"} == 1)
for: 15m
@ -324,6 +331,7 @@ spec:
- alert: PrometheusDown
annotations:
message: Prometheus has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusdown
expr: |
absent(up{job="prometheus-k8s"} == 1)
for: 15m
@ -332,6 +340,7 @@ spec:
- alert: PrometheusOperatorDown
annotations:
message: PrometheusOperator has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatordown
expr: |
absent(up{job="prometheus-operator"} == 1)
for: 15m
@ -343,6 +352,7 @@ spec:
annotations:
message: '{{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
}}) is restarting {{ printf "%.2f" $value }} / second'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping
expr: |
rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[15m]) > 0
for: 1h
@ -351,6 +361,7 @@ spec:
- alert: KubePodNotReady
annotations:
message: '{{ $labels.namespace }}/{{ $labels.pod }} is not ready.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready
expr: |
sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase!~"Running|Succeeded"}) > 0
for: 1h
@ -360,6 +371,7 @@ spec:
annotations:
message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} generation
mismatch
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentgenerationmismatch
expr: |
kube_deployment_status_observed_generation{job="kube-state-metrics"}
!=
@ -371,6 +383,7 @@ spec:
annotations:
message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} replica
mismatch
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch
expr: |
kube_deployment_spec_replicas{job="kube-state-metrics"}
!=
@ -382,6 +395,7 @@ spec:
annotations:
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} replica
mismatch
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch
expr: |
kube_statefulset_status_replicas_ready{job="kube-state-metrics"}
!=
@ -393,6 +407,7 @@ spec:
annotations:
message: StatefulSet {{ $labels.namespace }}/{{ labels.statefulset }} generation
mismatch
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetgenerationmismatch
expr: |
kube_statefulset_status_observed_generation{job="kube-state-metrics"}
!=
@ -404,6 +419,7 @@ spec:
annotations:
message: Only {{$value}}% of desired pods scheduled and ready for daemon set
{{$labels.namespace}}/{{$labels.daemonset}}
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetrolloutstuck
expr: |
kube_daemonset_status_number_ready{job="kube-state-metrics"}
/
@ -415,6 +431,7 @@ spec:
annotations:
message: A number of pods of daemonset {{$labels.namespace}}/{{$labels.daemonset}}
are not scheduled.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetnotscheduled
expr: |
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"}
-
@ -426,17 +443,48 @@ spec:
annotations:
message: A number of pods of daemonset {{$labels.namespace}}/{{$labels.daemonset}}
are running where they are not supposed to run.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetmisscheduled
expr: |
kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0
for: 10m
labels:
severity: warning
- alert: KubeCronJobRunning
annotations:
message: CronJob {{ $labels.namespaces }}/{{ $labels.cronjob }} is taking
more than 1h to complete.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecronjobrunning
expr: |
time() - kube_cronjob_next_schedule_time{job="kube-state-metrics"} > 3600
for: 1h
labels:
severity: warning
- alert: KubeJobCompletion
annotations:
message: Job {{ $labels.namespaces }}/{{ $labels.job }} is taking more than
1h to complete.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion
expr: |
kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"} > 0
for: 1h
labels:
severity: warning
- alert: KubeJobFailed
annotations:
message: Job {{ $labels.namespaces }}/{{ $labels.job }} failed to complete.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobfailed
expr: |
kube_job_status_failed{job="kube-state-metrics"} > 0
for: 1h
labels:
severity: warning
- name: kubernetes-resources
rules:
- alert: KubeCPUOvercommit
annotations:
message: Overcommited CPU resource requests on Pods, cannot tolerate node
failure.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
expr: |
sum(namespace_name:kube_pod_container_resource_requests_cpu_cores:sum)
/
@ -450,6 +498,7 @@ spec:
annotations:
message: Overcommited Memory resource requests on Pods, cannot tolerate node
failure.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememovercommit
expr: |
sum(namespace_name:kube_pod_container_resource_requests_memory_bytes:sum)
/
@ -464,6 +513,7 @@ spec:
- alert: KubeCPUOvercommit
annotations:
message: Overcommited CPU resource request quota on Namespaces.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
expr: |
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="requests.cpu"})
/
@ -475,6 +525,7 @@ spec:
- alert: KubeMemOvercommit
annotations:
message: Overcommited Memory resource request quota on Namespaces.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememovercommit
expr: |
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="requests.memory"})
/
@ -487,6 +538,7 @@ spec:
annotations:
message: '{{ printf "%0.0f" $value }}% usage of {{ $labels.resource }} in
namespace {{ $labels.namespace }}.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaexceeded
expr: |
100 * kube_resourcequota{job="kube-state-metrics", type="used"}
/ ignoring(instance, job, type)
@ -502,6 +554,7 @@ spec:
message: The persistent volume claimed by {{ $labels.persistentvolumeclaim
}} in namespace {{ $labels.namespace }} has {{ printf "%0.0f" $value }}%
free.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeusagecritical
expr: |
100 * kubelet_volume_stats_available_bytes{job="kubelet"}
/
@ -515,6 +568,7 @@ spec:
message: Based on recent sampling, the persistent volume claimed by {{ $labels.persistentvolumeclaim
}} in namespace {{ $labels.namespace }} is expected to fill up within four
days.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefullinfourdays
expr: |
predict_linear(kubelet_volume_stats_available_bytes{job="kubelet"}[1h], 4 * 24 * 3600) < 0
for: 5m
@ -525,6 +579,7 @@ spec:
- alert: KubeNodeNotReady
annotations:
message: '{{ $labels.node }} has been unready for more than an hour'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodenotready
expr: |
kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0
for: 1h
@ -534,6 +589,7 @@ spec:
annotations:
message: There are {{ $value }} different versions of Kubernetes components
running.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeversionmismatch
expr: |
count(count(kubernetes_build_info{job!="kube-dns"}) by (gitVersion)) > 1
for: 1h
@ -543,6 +599,7 @@ spec:
annotations:
message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance
}}' is experiencing {{ printf "%0.0f" $value }}% errors.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors
expr: |
sum(rate(rest_client_requests_total{code!~"2.."}[5m])) by (instance, job) * 100
/
@ -555,6 +612,7 @@ spec:
annotations:
message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance
}}' is experiencing {{ printf "%0.0f" $value }} errors / sec.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors
expr: |
sum(rate(ksm_scrape_error_total{job="kube-state-metrics"}[5m])) by (instance, job) > 0.1
for: 15m
@ -564,6 +622,7 @@ spec:
annotations:
message: Kubelet {{$labels.instance}} is running {{$value}} pods, close to
the limit of 110.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods
expr: |
kubelet_running_pod_count{job="kubelet"} > 100
for: 15m
@ -573,6 +632,7 @@ spec:
annotations:
message: The API server has a 99th percentile latency of {{ $value }} seconds
for {{$labels.verb}} {{$labels.resource}}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
expr: |
cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:WATCH|WATCHLIST|PROXY|CONNECT)$"} > 1
for: 10m
@ -582,6 +642,7 @@ spec:
annotations:
message: The API server has a 99th percentile latency of {{ $value }} seconds
for {{$labels.verb}} {{$labels.resource}}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
expr: |
cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:WATCH|WATCHLIST|PROXY|CONNECT)$"} > 4
for: 10m
@ -590,6 +651,7 @@ spec:
- alert: KubeAPIErrorsHigh
annotations:
message: API server is erroring for {{ $value }}% of requests.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
expr: |
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) without(instance, pod)
/
@ -600,6 +662,7 @@ spec:
- alert: KubeAPIErrorsHigh
annotations:
message: API server is erroring for {{ $value }}% of requests.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
expr: |
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) without(instance, pod)
/
@ -610,6 +673,7 @@ spec:
- alert: KubeClientCertificateExpiration
annotations:
message: Kubernetes API certificate is expiring in less than 7 days.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
expr: |
histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
labels:
@ -617,6 +681,7 @@ spec:
- alert: KubeClientCertificateExpiration
annotations:
message: Kubernetes API certificate is expiring in less than 1 day.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
expr: |
histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
labels:

View file

@ -1802,6 +1802,42 @@ spec:
proxyUrl:
description: Optional ProxyURL
type: string
queueConfig:
description: QueueConfig allows the tuning of remote_write queue_config
parameters. This object is referenced in the RemoteWriteSpec
object.
properties:
batchSendDeadline:
description: BatchSendDeadline is the maximum time a sample
will wait in buffer.
type: string
capacity:
description: Capacity is the number of samples to buffer per
shard before we start dropping them.
format: int32
type: integer
maxBackoff:
description: MaxBackoff is the maximum retry delay.
type: string
maxRetries:
description: MaxRetries is the maximum number of times to
retry a batch on recoverable errors.
format: int32
type: integer
maxSamplesPerSend:
description: MaxSamplesPerSend is the maximum number of samples
per send.
format: int32
type: integer
maxShards:
description: MaxShards is the maximum number of shards, i.e.
amount of concurrency.
format: int32
type: integer
minBackoff:
description: MinBackoff is the initial retry delay. Gets doubled
for every retry.
type: string
remoteTimeout:
description: Timeout for requests to the remote write endpoint.
type: string
@ -2673,6 +2709,77 @@ spec:
phase:
description: Phase represents the current phase of PersistentVolumeClaim.
type: string
thanos:
description: ThanosSpec defines parameters for a Prometheus server within
a Thanos deployment.
properties:
baseImage:
description: Thanos base image if other than default.
type: string
gcs:
description: ThanosGCSSpec defines parameters for use of Google
Cloud Storage (GCS) with Thanos.
properties:
bucket:
description: Google Cloud Storage bucket name for stored blocks.
If empty it won't store any block inside Google Cloud Storage.
type: string
peers:
description: Peers is a DNS name for Thanos to discover peers through.
type: string
s3:
description: ThanosSpec defines parameters for of AWS Simple Storage
Service (S3) with Thanos. (S3 compatible services apply as well)
properties:
accessKey:
description: SecretKeySelector selects a key of a Secret.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
type: string
optional:
description: Specify whether the Secret or it's key must
be defined
type: boolean
required:
- key
bucket:
description: S3-Compatible API bucket name for stored blocks.
type: string
endpoint:
description: S3-Compatible API endpoint for stored blocks.
type: string
insecure:
description: Whether to use an insecure connection with an S3-Compatible
API.
type: boolean
secretKey:
description: SecretKeySelector selects a key of a Secret.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
type: string
optional:
description: Specify whether the Secret or it's key must
be defined
type: boolean
required:
- key
signatureVersion2:
description: Whether to use S3 Signature Version 2; otherwise
Signature Version 4 will be used.
type: boolean
version:
description: Version describes the version of Thanos to use.
type: string
tolerations:
description: If specified, the pod's tolerations.
items:

View file

@ -0,0 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: prometheus-self
namespace: default
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-self
subjects:
- kind: ServiceAccount
name: default
namespace: default

View file

@ -0,0 +1,17 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: prometheus-self
namespace: default
rules:
- apiGroups:
- ""
resources:
- nodes
- services
- endpoints
- pods
verbs:
- get
- list
- watch

View file

@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
labels:
app: prometheus
prometheus: self
name: prometheus-self
spec:
ports:
- name: web
port: 9090
protocol: TCP
targetPort: web
selector:
prometheus: self

View file

@ -0,0 +1,13 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: prometheus
labels:
app: prometheus
spec:
selector:
matchLabels:
app: prometheus
endpoints:
- port: web
interval: 30s

View file

@ -0,0 +1,20 @@
apiVersion: monitoring.coreos.com/v1
kind: Prometheus
metadata:
name: self
labels:
prometheus: self
spec:
podMetadata:
labels:
thanos-peer: 'true'
replicas: 2
serviceMonitorSelector:
matchLabels:
app: prometheus
ruleSelector:
matchLabels:
role: prometheus-rulefiles
prometheus: k8s
thanos:
peers: thanos-peers.default.svc:10900

View file

@ -6,7 +6,7 @@ metadata:
app: thanos-query
thanos-peer: "true"
spec:
replicas: 2
replicas: 1
selector:
matchLabels:
app: thanos-query
@ -19,7 +19,7 @@ spec:
spec:
containers:
- name: thanos-query
image: improbable/thanos:latest
image: improbable/thanos:v0.1.0-rc.1
args:
- "query"
- "--log.level=debug"
@ -31,21 +31,4 @@ spec:
- name: grpc
containerPort: 10901
- name: cluster
containerPort: 10900
---
apiVersion: v1
kind: Service
metadata:
labels:
app: thanos-query
name: thanos-query
spec:
type: NodePort
selector:
app: thanos-query
ports:
- port: 9090
protocol: TCP
targetPort: http
name: http-query
nodePort: 31111
containerPort: 10900

View file

@ -0,0 +1,14 @@
apiVersion: v1
kind: Service
metadata:
labels:
app: thanos-query
name: thanos-query
spec:
selector:
app: thanos-query
ports:
- port: 9090
protocol: TCP
targetPort: http
name: http-query

View file

@ -10,5 +10,4 @@ spec:
port: 10900
targetPort: cluster
selector:
# Useful endpoint for gathering all thanos components for common gossip cluster.
thanos-peer: "true"

File diff suppressed because one or more lines are too long

View file

@ -33,7 +33,7 @@ import (
const (
governingServiceName = "alertmanager-operated"
defaultVersion = "v0.14.0"
defaultVersion = "v0.15.0"
secretsDir = "/etc/alertmanager/secrets/"
alertmanagerConfDir = "/etc/alertmanager/config"
alertmanagerConfFile = alertmanagerConfDir + "/alertmanager.yaml"

View file

@ -191,6 +191,7 @@ func UnstructuredFromAlertmanager(a *Alertmanager) (*unstructured.Unstructured,
// necessary anymore.
unstructured.RemoveNestedField(r.Object, "metadata", "creationTimestamp")
unstructured.RemoveNestedField(r.Object, "spec", "storage", "volumeClaimTemplate", "metadata", "creationTimestamp")
unstructured.RemoveNestedField(r.Object, "spec", "podMetadata", "creationTimestamp")
return &r, nil
}

View file

@ -1043,11 +1043,17 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA
Ref: ref("k8s.io/api/core/v1.SecretKeySelector"),
},
},
"thanos": {
SchemaProps: spec.SchemaProps{
Description: "Thanos configuration allows configuring various aspects of a Prometheus server in a Thanos environment.\n\nThis section is experimental, it may change significantly without deprecation notice in any release.\n\nThis is experimental and may change significantly without backward compatibility in any release.",
Ref: ref("github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.ThanosSpec"),
},
},
},
},
},
Dependencies: []string{
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.AlertingSpec", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.RemoteReadSpec", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.RemoteWriteSpec", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.StorageSpec", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.Container", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.ResourceRequirements", "k8s.io/api/core/v1.SecretKeySelector", "k8s.io/api/core/v1.Toleration", "k8s.io/apimachinery/pkg/apis/meta/v1.LabelSelector", "k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"},
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.AlertingSpec", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.RemoteReadSpec", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.RemoteWriteSpec", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.StorageSpec", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.ThanosSpec", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.Container", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.ResourceRequirements", "k8s.io/api/core/v1.SecretKeySelector", "k8s.io/api/core/v1.Toleration", "k8s.io/apimachinery/pkg/apis/meta/v1.LabelSelector", "k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"},
},
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.PrometheusStatus": {
Schema: spec.Schema{
@ -1095,6 +1101,65 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA
},
Dependencies: []string{},
},
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.QueueConfig": {
Schema: spec.Schema{
SchemaProps: spec.SchemaProps{
Description: "QueueConfig allows the tuning of remote_write queue_config parameters. This object is referenced in the RemoteWriteSpec object.",
Properties: map[string]spec.Schema{
"capacity": {
SchemaProps: spec.SchemaProps{
Description: "Capacity is the number of samples to buffer per shard before we start dropping them.",
Type: []string{"integer"},
Format: "int32",
},
},
"maxShards": {
SchemaProps: spec.SchemaProps{
Description: "MaxShards is the maximum number of shards, i.e. amount of concurrency.",
Type: []string{"integer"},
Format: "int32",
},
},
"maxSamplesPerSend": {
SchemaProps: spec.SchemaProps{
Description: "MaxSamplesPerSend is the maximum number of samples per send.",
Type: []string{"integer"},
Format: "int32",
},
},
"batchSendDeadline": {
SchemaProps: spec.SchemaProps{
Description: "BatchSendDeadline is the maximum time a sample will wait in buffer.",
Type: []string{"string"},
Format: "",
},
},
"maxRetries": {
SchemaProps: spec.SchemaProps{
Description: "MaxRetries is the maximum number of times to retry a batch on recoverable errors.",
Type: []string{"integer"},
Format: "int32",
},
},
"minBackoff": {
SchemaProps: spec.SchemaProps{
Description: "MinBackoff is the initial retry delay. Gets doubled for every retry.",
Type: []string{"string"},
Format: "",
},
},
"maxBackoff": {
SchemaProps: spec.SchemaProps{
Description: "MaxBackoff is the maximum retry delay.",
Type: []string{"string"},
Format: "",
},
},
},
},
},
Dependencies: []string{},
},
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.RelabelConfig": {
Schema: spec.Schema{
SchemaProps: spec.SchemaProps{
@ -1306,12 +1371,18 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA
Format: "",
},
},
"queueConfig": {
SchemaProps: spec.SchemaProps{
Description: "QueueConfig allows tuning of the remote write queue parameters.",
Ref: ref("github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.QueueConfig"),
},
},
},
Required: []string{"url"},
},
},
Dependencies: []string{
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.BasicAuth", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.RelabelConfig", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.TLSConfig"},
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.BasicAuth", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.QueueConfig", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.RelabelConfig", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.TLSConfig"},
},
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.Rule": {
Schema: spec.Schema{
@ -1632,6 +1703,118 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA
},
Dependencies: []string{},
},
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.ThanosGCSSpec": {
Schema: spec.Schema{
SchemaProps: spec.SchemaProps{
Description: "ThanosGCSSpec defines parameters for use of Google Cloud Storage (GCS) with Thanos.",
Properties: map[string]spec.Schema{
"bucket": {
SchemaProps: spec.SchemaProps{
Description: "Google Cloud Storage bucket name for stored blocks. If empty it won't store any block inside Google Cloud Storage.",
Type: []string{"string"},
Format: "",
},
},
},
},
},
Dependencies: []string{},
},
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.ThanosS3Spec": {
Schema: spec.Schema{
SchemaProps: spec.SchemaProps{
Description: "ThanosSpec defines parameters for of AWS Simple Storage Service (S3) with Thanos. (S3 compatible services apply as well)",
Properties: map[string]spec.Schema{
"bucket": {
SchemaProps: spec.SchemaProps{
Description: "S3-Compatible API bucket name for stored blocks.",
Type: []string{"string"},
Format: "",
},
},
"endpoint": {
SchemaProps: spec.SchemaProps{
Description: "S3-Compatible API endpoint for stored blocks.",
Type: []string{"string"},
Format: "",
},
},
"accessKey": {
SchemaProps: spec.SchemaProps{
Description: "AccessKey for an S3-Compatible API.",
Ref: ref("k8s.io/api/core/v1.SecretKeySelector"),
},
},
"secretKey": {
SchemaProps: spec.SchemaProps{
Description: "SecretKey for an S3-Compatible API.",
Ref: ref("k8s.io/api/core/v1.SecretKeySelector"),
},
},
"insecure": {
SchemaProps: spec.SchemaProps{
Description: "Whether to use an insecure connection with an S3-Compatible API.",
Type: []string{"boolean"},
Format: "",
},
},
"signatureVersion2": {
SchemaProps: spec.SchemaProps{
Description: "Whether to use S3 Signature Version 2; otherwise Signature Version 4 will be used.",
Type: []string{"boolean"},
Format: "",
},
},
},
},
},
Dependencies: []string{
"k8s.io/api/core/v1.SecretKeySelector"},
},
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.ThanosSpec": {
Schema: spec.Schema{
SchemaProps: spec.SchemaProps{
Description: "ThanosSpec defines parameters for a Prometheus server within a Thanos deployment.",
Properties: map[string]spec.Schema{
"peers": {
SchemaProps: spec.SchemaProps{
Description: "Peers is a DNS name for Thanos to discover peers through.",
Type: []string{"string"},
Format: "",
},
},
"version": {
SchemaProps: spec.SchemaProps{
Description: "Version describes the version of Thanos to use.",
Type: []string{"string"},
Format: "",
},
},
"baseImage": {
SchemaProps: spec.SchemaProps{
Description: "Thanos base image if other than default.",
Type: []string{"string"},
Format: "",
},
},
"gcs": {
SchemaProps: spec.SchemaProps{
Description: "GCS configures use of GCS in Thanos.",
Ref: ref("github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.ThanosGCSSpec"),
},
},
"s3": {
SchemaProps: spec.SchemaProps{
Description: "S3 configures use of S3 in Thanos.",
Ref: ref("github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.ThanosS3Spec"),
},
},
},
},
},
Dependencies: []string{
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.ThanosGCSSpec", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.ThanosS3Spec"},
},
"k8s.io/api/core/v1.AWSElasticBlockStoreVolumeSource": {
Schema: spec.Schema{
SchemaProps: spec.SchemaProps{

View file

@ -190,6 +190,7 @@ func UnstructuredFromPrometheus(p *Prometheus) (*unstructured.Unstructured, erro
// necessary anymore.
unstructured.RemoveNestedField(r.Object, "metadata", "creationTimestamp")
unstructured.RemoveNestedField(r.Object, "spec", "storage", "volumeClaimTemplate", "metadata", "creationTimestamp")
unstructured.RemoveNestedField(r.Object, "spec", "podMetadata", "creationTimestamp")
return &r, nil
}

View file

@ -163,6 +163,15 @@ type PrometheusSpec struct {
// notes to ensure that no incompatible AlertManager configs are going to break
// Prometheus after the upgrade.
AdditionalAlertManagerConfigs *v1.SecretKeySelector `json:"additionalAlertManagerConfigs,omitempty"`
// Thanos configuration allows configuring various aspects of a Prometheus
// server in a Thanos environment.
//
// This section is experimental, it may change significantly without
// deprecation notice in any release.
//
// This is experimental and may change significantly without backward
// compatibility in any release.
Thanos *ThanosSpec `json:"thanos,omitempty"`
}
// Most recent observed status of the Prometheus cluster. Read-only. Not
@ -215,6 +224,48 @@ type StorageSpec struct {
VolumeClaimTemplate v1.PersistentVolumeClaim `json:"volumeClaimTemplate,omitempty"`
}
// ThanosSpec defines parameters for a Prometheus server within a Thanos deployment.
// +k8s:openapi-gen=true
type ThanosSpec struct {
// Peers is a DNS name for Thanos to discover peers through.
Peers *string `json:"peers,omitempty"`
// Version describes the version of Thanos to use.
Version *string `json:"version,omitempty"`
// Thanos base image if other than default.
BaseImage *string `json:"baseImage,omitempty"`
// GCS configures use of GCS in Thanos.
GCS *ThanosGCSSpec `json:"gcs,omitempty"`
// S3 configures use of S3 in Thanos.
S3 *ThanosS3Spec `json:"s3,omitempty"`
}
// ThanosGCSSpec defines parameters for use of Google Cloud Storage (GCS) with
// Thanos.
// +k8s:openapi-gen=true
type ThanosGCSSpec struct {
// Google Cloud Storage bucket name for stored blocks. If empty it won't
// store any block inside Google Cloud Storage.
Bucket *string `json:"bucket,omitempty"`
}
// ThanosSpec defines parameters for of AWS Simple Storage Service (S3) with
// Thanos. (S3 compatible services apply as well)
// +k8s:openapi-gen=true
type ThanosS3Spec struct {
// S3-Compatible API bucket name for stored blocks.
Bucket *string `json:"bucket,omitempty"`
// S3-Compatible API endpoint for stored blocks.
Endpoint *string `json:"endpoint,omitempty"`
// AccessKey for an S3-Compatible API.
AccessKey *v1.SecretKeySelector `json:"accessKey,omitempty"`
// SecretKey for an S3-Compatible API.
SecretKey *v1.SecretKeySelector `json:"secretKey,omitempty"`
// Whether to use an insecure connection with an S3-Compatible API.
Insecure *bool `json:"insecure,omitempty"`
// Whether to use S3 Signature Version 2; otherwise Signature Version 4 will be used.
SignatureVersion2 *bool `json:"signatureVersion2,omitempty"`
}
// RemoteWriteSpec defines the remote_write configuration for prometheus.
// +k8s:openapi-gen=true
type RemoteWriteSpec struct {
@ -234,6 +285,28 @@ type RemoteWriteSpec struct {
TLSConfig *TLSConfig `json:"tlsConfig,omitempty"`
//Optional ProxyURL
ProxyURL string `json:"proxyUrl,omitempty"`
// QueueConfig allows tuning of the remote write queue parameters.
QueueConfig *QueueConfig `json:"queueConfig,omitempty"`
}
// QueueConfig allows the tuning of remote_write queue_config parameters. This object
// is referenced in the RemoteWriteSpec object.
// +k8s:openapi-gen=true
type QueueConfig struct {
// Capacity is the number of samples to buffer per shard before we start dropping them.
Capacity int `json:"capacity,omitempty"`
// MaxShards is the maximum number of shards, i.e. amount of concurrency.
MaxShards int `json:"maxShards,omitempty"`
// MaxSamplesPerSend is the maximum number of samples per send.
MaxSamplesPerSend int `json:"maxSamplesPerSend,omitempty"`
// BatchSendDeadline is the maximum time a sample will wait in buffer.
BatchSendDeadline string `json:"batchSendDeadline,omitempty"`
// MaxRetries is the maximum number of times to retry a batch on recoverable errors.
MaxRetries int `json:"maxRetries,omitempty"`
// MinBackoff is the initial retry delay. Gets doubled for every retry.
MinBackoff string `json:"minBackoff,omitempty"`
// MaxBackoff is the maximum retry delay.
MaxBackoff string `json:"maxBackoff,omitempty"`
}
// RemoteReadSpec defines the remote_read configuration for prometheus.

View file

@ -671,6 +671,15 @@ func (in *PrometheusSpec) DeepCopyInto(out *PrometheusSpec) {
(*in).DeepCopyInto(*out)
}
}
if in.Thanos != nil {
in, out := &in.Thanos, &out.Thanos
if *in == nil {
*out = nil
} else {
*out = new(ThanosSpec)
(*in).DeepCopyInto(*out)
}
}
return
}
@ -700,6 +709,22 @@ func (in *PrometheusStatus) DeepCopy() *PrometheusStatus {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *QueueConfig) DeepCopyInto(out *QueueConfig) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new QueueConfig.
func (in *QueueConfig) DeepCopy() *QueueConfig {
if in == nil {
return nil
}
out := new(QueueConfig)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *RelabelConfig) DeepCopyInto(out *RelabelConfig) {
*out = *in
@ -790,6 +815,15 @@ func (in *RemoteWriteSpec) DeepCopyInto(out *RemoteWriteSpec) {
**out = **in
}
}
if in.QueueConfig != nil {
in, out := &in.QueueConfig, &out.QueueConfig
if *in == nil {
*out = nil
} else {
*out = new(QueueConfig)
**out = **in
}
}
return
}
@ -986,3 +1020,159 @@ func (in *TLSConfig) DeepCopy() *TLSConfig {
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ThanosGCSSpec) DeepCopyInto(out *ThanosGCSSpec) {
*out = *in
if in.Bucket != nil {
in, out := &in.Bucket, &out.Bucket
if *in == nil {
*out = nil
} else {
*out = new(string)
**out = **in
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ThanosGCSSpec.
func (in *ThanosGCSSpec) DeepCopy() *ThanosGCSSpec {
if in == nil {
return nil
}
out := new(ThanosGCSSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ThanosS3Spec) DeepCopyInto(out *ThanosS3Spec) {
*out = *in
if in.Bucket != nil {
in, out := &in.Bucket, &out.Bucket
if *in == nil {
*out = nil
} else {
*out = new(string)
**out = **in
}
}
if in.Endpoint != nil {
in, out := &in.Endpoint, &out.Endpoint
if *in == nil {
*out = nil
} else {
*out = new(string)
**out = **in
}
}
if in.AccessKey != nil {
in, out := &in.AccessKey, &out.AccessKey
if *in == nil {
*out = nil
} else {
*out = new(core_v1.SecretKeySelector)
(*in).DeepCopyInto(*out)
}
}
if in.SecretKey != nil {
in, out := &in.SecretKey, &out.SecretKey
if *in == nil {
*out = nil
} else {
*out = new(core_v1.SecretKeySelector)
(*in).DeepCopyInto(*out)
}
}
if in.Insecure != nil {
in, out := &in.Insecure, &out.Insecure
if *in == nil {
*out = nil
} else {
*out = new(bool)
**out = **in
}
}
if in.SignatureVersion2 != nil {
in, out := &in.SignatureVersion2, &out.SignatureVersion2
if *in == nil {
*out = nil
} else {
*out = new(bool)
**out = **in
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ThanosS3Spec.
func (in *ThanosS3Spec) DeepCopy() *ThanosS3Spec {
if in == nil {
return nil
}
out := new(ThanosS3Spec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ThanosSpec) DeepCopyInto(out *ThanosSpec) {
*out = *in
if in.Peers != nil {
in, out := &in.Peers, &out.Peers
if *in == nil {
*out = nil
} else {
*out = new(string)
**out = **in
}
}
if in.Version != nil {
in, out := &in.Version, &out.Version
if *in == nil {
*out = nil
} else {
*out = new(string)
**out = **in
}
}
if in.BaseImage != nil {
in, out := &in.BaseImage, &out.BaseImage
if *in == nil {
*out = nil
} else {
*out = new(string)
**out = **in
}
}
if in.GCS != nil {
in, out := &in.GCS, &out.GCS
if *in == nil {
*out = nil
} else {
*out = new(ThanosGCSSpec)
(*in).DeepCopyInto(*out)
}
}
if in.S3 != nil {
in, out := &in.S3, &out.S3
if *in == nil {
*out = nil
} else {
*out = new(ThanosS3Spec)
(*in).DeepCopyInto(*out)
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ThanosSpec.
func (in *ThanosSpec) DeepCopy() *ThanosSpec {
if in == nil {
return nil
}
out := new(ThanosSpec)
in.DeepCopyInto(out)
return out
}

View file

@ -128,6 +128,7 @@ type Config struct {
PrometheusConfigReloader string
AlertmanagerDefaultBaseImage string
PrometheusDefaultBaseImage string
ThanosDefaultBaseImage string
Namespace string
Labels Labels
CrdGroup string

View file

@ -89,7 +89,7 @@ func buildExternalLabels(p *v1.Prometheus) yaml.MapSlice {
func generateConfig(p *v1.Prometheus, mons map[string]*v1.ServiceMonitor, basicAuthSecrets map[string]BasicAuthCredentials, additionalScrapeConfigs []byte, additionalAlertManagerConfigs []byte) ([]byte, error) {
versionStr := p.Spec.Version
if versionStr == "" {
versionStr = DefaultVersion
versionStr = DefaultPrometheusVersion
}
version, err := semver.Parse(strings.TrimLeft(versionStr, "v"))
@ -149,7 +149,7 @@ func generateConfig(p *v1.Prometheus, mons map[string]*v1.ServiceMonitor, basicA
var additionalScrapeConfigsYaml []yaml.MapSlice
err = yaml.Unmarshal([]byte(additionalScrapeConfigs), &additionalScrapeConfigsYaml)
if err != nil {
errors.Wrap(err, "unmarshalling additional scrape configs failed")
return nil, errors.Wrap(err, "unmarshalling additional scrape configs failed")
}
cfg = append(cfg, yaml.MapItem{
@ -160,7 +160,7 @@ func generateConfig(p *v1.Prometheus, mons map[string]*v1.ServiceMonitor, basicA
var additionalAlertManagerConfigsYaml []yaml.MapSlice
err = yaml.Unmarshal([]byte(additionalAlertManagerConfigs), &additionalAlertManagerConfigsYaml)
if err != nil {
errors.Wrap(err, "unmarshalling additional alert manager configs failed")
return nil, errors.Wrap(err, "unmarshalling additional alert manager configs failed")
}
alertmanagerConfigs = append(alertmanagerConfigs, additionalAlertManagerConfigsYaml...)
@ -710,6 +710,40 @@ func generateRemoteWriteConfig(version semver.Version, specs []v1.RemoteWriteSpe
cfg = append(cfg, yaml.MapItem{Key: "proxy_url", Value: spec.ProxyURL})
}
if spec.QueueConfig != nil {
queueConfig := yaml.MapSlice{}
if spec.QueueConfig.Capacity != int(0) {
queueConfig = append(queueConfig, yaml.MapItem{Key: "capacity", Value: spec.QueueConfig.Capacity})
}
if spec.QueueConfig.MaxShards != int(0) {
queueConfig = append(queueConfig, yaml.MapItem{Key: "max_shards", Value: spec.QueueConfig.MaxShards})
}
if spec.QueueConfig.MaxSamplesPerSend != int(0) {
queueConfig = append(queueConfig, yaml.MapItem{Key: "max_samples_per_send", Value: spec.QueueConfig.MaxSamplesPerSend})
}
if spec.QueueConfig.BatchSendDeadline != "" {
queueConfig = append(queueConfig, yaml.MapItem{Key: "batch_send_deadline", Value: spec.QueueConfig.BatchSendDeadline})
}
if spec.QueueConfig.MaxRetries != int(0) {
queueConfig = append(queueConfig, yaml.MapItem{Key: "max_retries", Value: spec.QueueConfig.MaxRetries})
}
if spec.QueueConfig.MinBackoff != "" {
queueConfig = append(queueConfig, yaml.MapItem{Key: "min_backoff", Value: spec.QueueConfig.MinBackoff})
}
if spec.QueueConfig.MaxBackoff != "" {
queueConfig = append(queueConfig, yaml.MapItem{Key: "max_backoff", Value: spec.QueueConfig.MaxBackoff})
}
cfg = append(cfg, yaml.MapItem{Key: "queue_config", Value: queueConfig})
}
cfgs = append(cfgs, cfg)
}

View file

@ -32,17 +32,18 @@ import (
)
const (
governingServiceName = "prometheus-operated"
DefaultVersion = "v2.2.1"
defaultRetention = "24h"
storageDir = "/prometheus"
confDir = "/etc/prometheus/config"
confOutDir = "/etc/prometheus/config_out"
rulesDir = "/etc/prometheus/rules"
secretsDir = "/etc/prometheus/secrets/"
configFilename = "prometheus.yaml"
configEnvsubstFilename = "prometheus.env.yaml"
sSetInputChecksumName = "prometheus-operator-input-checksum"
governingServiceName = "prometheus-operated"
DefaultPrometheusVersion = "v2.3.1"
DefaultThanosVersion = "v0.1.0-rc.1"
defaultRetention = "24h"
storageDir = "/prometheus"
confDir = "/etc/prometheus/config"
confOutDir = "/etc/prometheus/config_out"
rulesDir = "/etc/prometheus/rules"
secretsDir = "/etc/prometheus/secrets/"
configFilename = "prometheus.yaml"
configEnvsubstFilename = "prometheus.env.yaml"
sSetInputChecksumName = "prometheus-operator-input-checksum"
)
var (
@ -88,7 +89,11 @@ func makeStatefulSet(
p.Spec.BaseImage = config.PrometheusDefaultBaseImage
}
if p.Spec.Version == "" {
p.Spec.Version = DefaultVersion
p.Spec.Version = DefaultPrometheusVersion
}
if p.Spec.Thanos != nil && p.Spec.Thanos.Version == nil {
v := DefaultThanosVersion
p.Spec.Thanos.Version = &v
}
versionStr := strings.TrimLeft(p.Spec.Version, "v")
@ -542,6 +547,86 @@ func makeStatefulSetSpec(p monitoringv1.Prometheus, c *Config) (*appsv1.Stateful
finalLabels := c.Labels.Merge(podLabels)
additionalContainers := p.Spec.Containers
if p.Spec.Thanos != nil {
thanosBaseImage := c.ThanosDefaultBaseImage
if p.Spec.Thanos.BaseImage != nil {
thanosBaseImage = *p.Spec.Thanos.BaseImage
}
thanosArgs := []string{"sidecar"}
if p.Spec.Thanos.Peers != nil {
thanosArgs = append(thanosArgs, fmt.Sprintf("--cluster.peers=%s", *p.Spec.Thanos.Peers))
}
if p.Spec.LogLevel != "" && p.Spec.LogLevel != "info" {
thanosArgs = append(thanosArgs, fmt.Sprintf("--log.level=%s", p.Spec.LogLevel))
}
if p.Spec.Thanos.GCS != nil {
if p.Spec.Thanos.GCS.Bucket != nil {
thanosArgs = append(thanosArgs, fmt.Sprintf("--gcs.bucket=%s", *p.Spec.Thanos.GCS.Bucket))
}
}
envVars := []v1.EnvVar{}
if p.Spec.Thanos.S3 != nil {
if p.Spec.Thanos.S3.Bucket != nil {
thanosArgs = append(thanosArgs, fmt.Sprintf("--s3.bucket=%s", *p.Spec.Thanos.S3.Bucket))
}
if p.Spec.Thanos.S3.Endpoint != nil {
thanosArgs = append(thanosArgs, fmt.Sprintf("--s3.endpoint=%s", *p.Spec.Thanos.S3.Endpoint))
}
if p.Spec.Thanos.S3.Insecure != nil && *p.Spec.Thanos.S3.Insecure {
thanosArgs = append(thanosArgs, "--s3.insecure")
}
if p.Spec.Thanos.S3.SignatureVersion2 != nil && *p.Spec.Thanos.S3.SignatureVersion2 {
thanosArgs = append(thanosArgs, "--s3.signature-version2")
}
if p.Spec.Thanos.S3.AccessKey != nil {
envVars = append(envVars, v1.EnvVar{
Name: "S3_ACCESS_KEY",
ValueFrom: &v1.EnvVarSource{
SecretKeyRef: p.Spec.Thanos.S3.AccessKey,
},
})
}
if p.Spec.Thanos.S3.SecretKey != nil {
envVars = append(envVars, v1.EnvVar{
Name: "S3_SECRET_KEY",
ValueFrom: &v1.EnvVarSource{
SecretKeyRef: p.Spec.Thanos.S3.SecretKey,
},
})
}
}
c := v1.Container{
Name: "thanos-sidecar",
Image: thanosBaseImage + ":" + *p.Spec.Thanos.Version,
Args: thanosArgs,
Ports: []v1.ContainerPort{
{
Name: "http",
ContainerPort: 10902,
},
{
Name: "grpc",
ContainerPort: 10901,
},
{
Name: "cluster",
ContainerPort: 10900,
},
},
Env: envVars,
}
additionalContainers = append(additionalContainers, c)
promArgs = append(promArgs, "--storage.tsdb.min-block-duration=2h", "--storage.tsdb.max-block-duration=2h")
}
return &appsv1.StatefulSetSpec{
ServiceName: governingServiceName,
Replicas: p.Spec.Replicas,
@ -610,7 +695,7 @@ func makeStatefulSetSpec(p monitoringv1.Prometheus, c *Config) (*appsv1.Stateful
},
},
},
}, p.Spec.Containers...),
}, additionalContainers...),
SecurityContext: securityContext,
ServiceAccountName: p.Spec.ServiceAccountName,
NodeSelector: p.Spec.NodeSelector,

View file

@ -205,7 +205,7 @@ func TestStatefulSetVolumeInitial(t *testing.T) {
VolumeSource: v1.VolumeSource{
ConfigMap: &v1.ConfigMapVolumeSource{
LocalObjectReference: v1.LocalObjectReference{
Name: "prometheus-volume-init-test-rules",
Name: "prometheus-volume-init-test-rulefiles",
},
},
},

View file

@ -15,40 +15,19 @@ if [[ "${TRAVIS_PULL_REQUEST}" != "false" ]]; then
exit 0
fi
# Builds both prometheus-operator and prometheus-config-reloader
make crossbuild
#
# prometheus-operator
#
export REPO=quay.io/coreos/prometheus-operator
# Push to Quay '-dev' repo if not a git tag or master branch build
export REPO="quay.io/coreos/prometheus-operator"
export REPO_PROMETHEUS_CONFIG_RELOADER="quay.io/coreos/prometheus-config-reloader"
if [[ "${TRAVIS_TAG}" == "" ]] && [[ "${TRAVIS_BRANCH}" != master ]]; then
export REPO="${REPO}-dev"
export REPO="quay.io/coreos/prometheus-operator-dev"
export REPO_PROMETHEUS_CONFIG_RELOADER="quay.io/coreos/prometheus-config-reloader-dev"
fi
# For both git tags and git branches 'TRAVIS_BRANCH' contains the name.
export TAG="${TRAVIS_BRANCH}"
make container
make image
echo "${QUAY_PASSWORD}" | docker login -u "${QUAY_USERNAME}" --password-stdin quay.io
docker push "${REPO}:${TRAVIS_BRANCH}"
#
# prometheus-config-reloader
#
cd contrib/prometheus-config-reloader
export REPO=quay.io/coreos/prometheus-config-reloader
# Push to Quay '-dev' repo if not a git tag or master branch build
if [[ "${TRAVIS_TAG}" == "" ]] && [[ "${TRAVIS_BRANCH}" != master ]]; then
export REPO="${REPO}-dev"
fi
# For both git tags and git branches 'TRAVIS_BRANCH' contains the name.
export TAG="${TRAVIS_BRANCH}"
make container
echo "${QUAY_PASSWORD}" | docker login -u "${QUAY_USERNAME}" --password-stdin quay.io
docker push "${REPO}:${TRAVIS_BRANCH}"
docker push "${REPO}:${TAG}"
docker push "${REPO_PROMETHEUS_CONFIG_RELOADER}:${TAG}"

View file

@ -1070,6 +1070,107 @@ func TestPromOpMatchPromAndServMonInDiffNSs(t *testing.T) {
}
}
func TestThanos(t *testing.T) {
t.Parallel()
ctx := framework.NewTestCtx(t)
defer ctx.Cleanup(t)
ns := ctx.CreateNamespace(t, framework.KubeClient)
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
peerServiceName := "thanos-peers"
querierServiceName := "thanos-querier"
basicPrometheus := framework.MakeBasicPrometheus(ns, "basic-prometheus", "test-group", 1)
peerServiceDNS := fmt.Sprintf("%s.%s.svc:10900", peerServiceName, ns)
version := "v0.1.0-rc.1"
basicPrometheus.Spec.Thanos = &monitoringv1.ThanosSpec{
Peers: &peerServiceDNS,
Version: &version,
}
basicPrometheus.Spec.PodMetadata = &metav1.ObjectMeta{
Labels: map[string]string{
"thanos-peer": "true",
},
}
replicas := int32(2)
basicPrometheus.Spec.Replicas = &replicas
pservice := framework.MakePrometheusService(basicPrometheus.Name, "test-group", v1.ServiceTypeClusterIP)
tservice := framework.MakeThanosService(peerServiceName)
qservice := framework.MakeThanosQuerierService(querierServiceName)
s := framework.MakeBasicServiceMonitor("test-group")
thanosQuerier, err := testFramework.MakeDeployment("../../example/thanos/querier-deployment.yaml")
if err != nil {
t.Fatal("Making deployment failed: ", err)
}
querierArgs := []string{
"query",
"--log.level=debug",
"--query.replica-label=prometheus_replica",
fmt.Sprintf("--cluster.peers=%s", peerServiceDNS),
}
log.Println("setting up querier with args: ", querierArgs)
thanosQuerier.Spec.Template.Spec.Containers[0].Args = querierArgs
if err := testFramework.CreateDeployment(framework.KubeClient, ns, thanosQuerier); err != nil {
t.Fatal("Creating Thanos querier failed: ", err)
}
if _, err := testFramework.CreateServiceAndWaitUntilReady(framework.KubeClient, ns, qservice); err != nil {
t.Fatal("Creating Thanos querier service failed: ", err)
}
if _, err := framework.MonClientV1.ServiceMonitors(ns).Create(s); err != nil {
t.Fatal("Creating ServiceMonitor failed: ", err)
}
if _, err := framework.KubeClient.CoreV1().Services(ns).Create(pservice); err != nil {
t.Fatal("Creating prometheus service failed: ", err)
}
if _, err := framework.MonClientV1.Prometheuses(ns).Create(basicPrometheus); err != nil {
t.Fatal("Creating prometheus failed: ", err)
}
if _, err := framework.KubeClient.CoreV1().Services(ns).Create(tservice); err != nil {
t.Fatal("Creating prometheus service failed: ", err)
}
err = wait.Poll(5*time.Second, 5*time.Minute, func() (bool, error) {
proxyGet := framework.KubeClient.CoreV1().Services(ns).ProxyGet
request := proxyGet("http", querierServiceName, "http-query", "/api/v1/query", map[string]string{"query": "prometheus_build_info", "dedup": "false"})
b, err := request.DoRaw()
if err != nil {
log.Println(fmt.Sprintf("Error performing request against Thanos querier: %v\n\nretrying...", err))
return false, nil
}
d := struct {
Data struct {
Result []map[string]interface{} `json:"result"`
} `json:"data"`
}{}
err = json.Unmarshal(b, &d)
if err != nil {
return false, err
}
result := len(d.Data.Result)
// We're expecting 4 results as we are requesting the
// `prometheus_build_info` metric, which is collected for both
// Prometheus replicas by both replicas.
expected := 4
if result != expected {
log.Printf("Unexpected number of results from query. Got %d, expected %d. retrying...\n", result, expected)
return false, nil
}
return true, nil
})
if err != nil {
t.Fatal("Failed to get correct result from Thanos querier: ", err)
}
}
func isDiscoveryWorking(ns, svcName, prometheusName string) func() (bool, error) {
return func() (bool, error) {
pods, err := framework.KubeClient.CoreV1().Pods(ns).List(prometheus.ListOptions(prometheusName))

View file

@ -41,7 +41,7 @@ func (f *Framework) MakeBasicPrometheus(ns, name, group string, replicas int32)
},
Spec: monitoringv1.PrometheusSpec{
Replicas: &replicas,
Version: prometheus.DefaultVersion,
Version: prometheus.DefaultPrometheusVersion,
ServiceMonitorSelector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"group": group,
@ -147,6 +147,48 @@ func (f *Framework) MakePrometheusService(name, group string, serviceType v1.Ser
return service
}
func (f *Framework) MakeThanosQuerierService(name string) *v1.Service {
service := &v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
Spec: v1.ServiceSpec{
Ports: []v1.ServicePort{
v1.ServicePort{
Name: "http-query",
Port: 10902,
TargetPort: intstr.FromString("http"),
},
},
Selector: map[string]string{
"app": "thanos-query",
},
},
}
return service
}
func (f *Framework) MakeThanosService(name string) *v1.Service {
service := &v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
Spec: v1.ServiceSpec{
Ports: []v1.ServicePort{
v1.ServicePort{
Name: "cluster",
Port: 10900,
TargetPort: intstr.FromString("cluster"),
},
},
Selector: map[string]string{
"thanos-peer": "true",
},
},
}
return service
}
func (f *Framework) CreatePrometheusAndWaitUntilReady(ns string, p *monitoringv1.Prometheus) error {
_, err := f.MonClientV1.Prometheuses(ns).Create(p)
if err != nil {