mirror of
https://github.com/prometheus-operator/prometheus-operator.git
synced 2025-04-21 03:38:43 +00:00
Merge branch 'master' into make-folder
This commit is contained in:
commit
2741a7cb09
39 changed files with 1188 additions and 183 deletions
Documentation
Makefilecmd/operator
contrib/kube-prometheus
experimental/thanos
jsonnet/kube-prometheus
alertmanager
kube-state-metrics
node-exporter
prometheus
manifests
example
prometheus-operator-crd
thanos
jsonnet/prometheus-operator
pkg
alertmanager
client/monitoring/v1
prometheus
scripts
test
|
@ -26,6 +26,7 @@ This Document documents the types introduced by the Prometheus Operator to be co
|
|||
* [PrometheusRuleSpec](#prometheusrulespec)
|
||||
* [PrometheusSpec](#prometheusspec)
|
||||
* [PrometheusStatus](#prometheusstatus)
|
||||
* [QueueConfig](#queueconfig)
|
||||
* [RelabelConfig](#relabelconfig)
|
||||
* [RemoteReadSpec](#remotereadspec)
|
||||
* [RemoteWriteSpec](#remotewritespec)
|
||||
|
@ -36,6 +37,9 @@ This Document documents the types introduced by the Prometheus Operator to be co
|
|||
* [ServiceMonitorSpec](#servicemonitorspec)
|
||||
* [StorageSpec](#storagespec)
|
||||
* [TLSConfig](#tlsconfig)
|
||||
* [ThanosGCSSpec](#thanosgcsspec)
|
||||
* [ThanosS3Spec](#thanoss3spec)
|
||||
* [ThanosSpec](#thanosspec)
|
||||
|
||||
## AlertingSpec
|
||||
|
||||
|
@ -265,6 +269,7 @@ Specification of the desired behavior of the Prometheus cluster. More info: http
|
|||
| containers | Containers allows injecting additional containers. This is meant to allow adding an authentication proxy to a Prometheus pod. | []v1.Container | false |
|
||||
| additionalScrapeConfigs | AdditionalScrapeConfigs allows specifying a key of a Secret containing additional Prometheus scrape configurations. Scrape configurations specified are appended to the configurations generated by the Prometheus Operator. Job configurations specified must have the form as specified in the official Prometheus documentation: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#<scrape_config>. As scrape configs are appended, the user is responsible to make sure it is valid. Note that using this feature may expose the possibility to break upgrades of Prometheus. It is advised to review Prometheus release notes to ensure that no incompatible scrape configs are going to break Prometheus after the upgrade. | *[v1.SecretKeySelector](https://v1-6.docs.kubernetes.io/docs/api-reference/v1.6/#secretkeyselector-v1-core) | false |
|
||||
| additionalAlertManagerConfigs | AdditionalAlertManagerConfigs allows specifying a key of a Secret containing additional Prometheus AlertManager configurations. AlertManager configurations specified are appended to the configurations generated by the Prometheus Operator. Job configurations specified must have the form as specified in the official Prometheus documentation: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#<alertmanager_config>. As AlertManager configs are appended, the user is responsible to make sure it is valid. Note that using this feature may expose the possibility to break upgrades of Prometheus. It is advised to review Prometheus release notes to ensure that no incompatible AlertManager configs are going to break Prometheus after the upgrade. | *[v1.SecretKeySelector](https://v1-6.docs.kubernetes.io/docs/api-reference/v1.6/#secretkeyselector-v1-core) | false |
|
||||
| thanos | Thanos configuration allows configuring various aspects of a Prometheus server in a Thanos environment.\n\nThis section is experimental, it may change significantly without deprecation notice in any release.\n\nThis is experimental and may change significantly without backward compatibility in any release. | *[ThanosSpec](#thanosspec) | false |
|
||||
|
||||
[Back to TOC](#table-of-contents)
|
||||
|
||||
|
@ -282,6 +287,22 @@ Most recent observed status of the Prometheus cluster. Read-only. Not included w
|
|||
|
||||
[Back to TOC](#table-of-contents)
|
||||
|
||||
## QueueConfig
|
||||
|
||||
QueueConfig allows the tuning of remote_write queue_config parameters. This object is referenced in the RemoteWriteSpec object.
|
||||
|
||||
| Field | Description | Scheme | Required |
|
||||
| ----- | ----------- | ------ | -------- |
|
||||
| capacity | Capacity is the number of samples to buffer per shard before we start dropping them. | int | false |
|
||||
| maxShards | MaxShards is the maximum number of shards, i.e. amount of concurrency. | int | false |
|
||||
| maxSamplesPerSend | MaxSamplesPerSend is the maximum number of samples per send. | int | false |
|
||||
| batchSendDeadline | BatchSendDeadline is the maximum time a sample will wait in buffer. | string | false |
|
||||
| maxRetries | MaxRetries is the maximum number of times to retry a batch on recoverable errors. | int | false |
|
||||
| minBackoff | MinBackoff is the initial retry delay. Gets doubled for every retry. | string | false |
|
||||
| maxBackoff | MaxBackoff is the maximum retry delay. | string | false |
|
||||
|
||||
[Back to TOC](#table-of-contents)
|
||||
|
||||
## RelabelConfig
|
||||
|
||||
RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs
|
||||
|
@ -330,6 +351,7 @@ RemoteWriteSpec defines the remote_write configuration for prometheus.
|
|||
| bearerTokenFile | File to read bearer token for remote write. | string | false |
|
||||
| tlsConfig | TLS Config to use for remote write. | *[TLSConfig](#tlsconfig) | false |
|
||||
| proxyUrl | Optional ProxyURL | string | false |
|
||||
| queueConfig | QueueConfig allows tuning of the remote write queue parameters. | *[QueueConfig](#queueconfig) | false |
|
||||
|
||||
[Back to TOC](#table-of-contents)
|
||||
|
||||
|
@ -423,3 +445,42 @@ TLSConfig specifies TLS configuration parameters.
|
|||
| insecureSkipVerify | Disable target certificate validation. | bool | false |
|
||||
|
||||
[Back to TOC](#table-of-contents)
|
||||
|
||||
## ThanosGCSSpec
|
||||
|
||||
ThanosGCSSpec defines parameters for use of Google Cloud Storage (GCS) with Thanos.
|
||||
|
||||
| Field | Description | Scheme | Required |
|
||||
| ----- | ----------- | ------ | -------- |
|
||||
| bucket | Google Cloud Storage bucket name for stored blocks. If empty it won't store any block inside Google Cloud Storage. | *string | false |
|
||||
|
||||
[Back to TOC](#table-of-contents)
|
||||
|
||||
## ThanosS3Spec
|
||||
|
||||
ThanosSpec defines parameters for of AWS Simple Storage Service (S3) with Thanos. (S3 compatible services apply as well)
|
||||
|
||||
| Field | Description | Scheme | Required |
|
||||
| ----- | ----------- | ------ | -------- |
|
||||
| bucket | S3-Compatible API bucket name for stored blocks. | *string | false |
|
||||
| endpoint | S3-Compatible API endpoint for stored blocks. | *string | false |
|
||||
| accessKey | AccessKey for an S3-Compatible API. | *[v1.SecretKeySelector](https://v1-6.docs.kubernetes.io/docs/api-reference/v1.6/#secretkeyselector-v1-core) | false |
|
||||
| secretKey | SecretKey for an S3-Compatible API. | *[v1.SecretKeySelector](https://v1-6.docs.kubernetes.io/docs/api-reference/v1.6/#secretkeyselector-v1-core) | false |
|
||||
| insecure | Whether to use an insecure connection with an S3-Compatible API. | *bool | false |
|
||||
| signatureVersion2 | Whether to use S3 Signature Version 2; otherwise Signature Version 4 will be used. | *bool | false |
|
||||
|
||||
[Back to TOC](#table-of-contents)
|
||||
|
||||
## ThanosSpec
|
||||
|
||||
ThanosSpec defines parameters for a Prometheus server within a Thanos deployment.
|
||||
|
||||
| Field | Description | Scheme | Required |
|
||||
| ----- | ----------- | ------ | -------- |
|
||||
| peers | Peers is a DNS name for Thanos to discover peers through. | *string | false |
|
||||
| version | Version describes the version of Thanos to use. | *string | false |
|
||||
| baseImage | Thanos base image if other than default. | *string | false |
|
||||
| gcs | GCS configures use of GCS in Thanos. | *[ThanosGCSSpec](#thanosgcsspec) | true |
|
||||
| s3 | S3 configures use of S3 in Thanos. | *[ThanosS3Spec](#thanoss3spec) | true |
|
||||
|
||||
[Back to TOC](#table-of-contents)
|
||||
|
|
|
@ -181,7 +181,7 @@ spec:
|
|||
- args:
|
||||
- --secure-listen-address=:9100
|
||||
- --upstream=http://127.0.0.1:9101/
|
||||
image: quay.io/coreos/kube-rbac-proxy:v0.3.0
|
||||
image: quay.io/coreos/kube-rbac-proxy:v0.3.1
|
||||
name: kube-rbac-proxy
|
||||
ports:
|
||||
- containerPort: 9100
|
||||
|
@ -272,7 +272,7 @@ spec:
|
|||
- args:
|
||||
- --secure-listen-address=:8443
|
||||
- --upstream=http://127.0.0.1:8081/
|
||||
image: quay.io/coreos/kube-rbac-proxy:v0.3.0
|
||||
image: quay.io/coreos/kube-rbac-proxy:v0.3.1
|
||||
name: kube-rbac-proxy-main
|
||||
ports:
|
||||
- containerPort: 8443
|
||||
|
@ -287,7 +287,7 @@ spec:
|
|||
- args:
|
||||
- --secure-listen-address=:9443
|
||||
- --upstream=http://127.0.0.1:8082/
|
||||
image: quay.io/coreos/kube-rbac-proxy:v0.3.0
|
||||
image: quay.io/coreos/kube-rbac-proxy:v0.3.1
|
||||
name: kube-rbac-proxy-self
|
||||
ports:
|
||||
- containerPort: 9443
|
||||
|
@ -304,7 +304,7 @@ spec:
|
|||
- --port=8081
|
||||
- --telemetry-host=127.0.0.1
|
||||
- --telemetry-port=8082
|
||||
image: quay.io/coreos/kube-state-metrics:v1.3.0
|
||||
image: quay.io/coreos/kube-state-metrics:v1.3.1
|
||||
name: kube-state-metrics
|
||||
resources:
|
||||
limits:
|
||||
|
@ -411,7 +411,7 @@ spec:
|
|||
matchExpressions:
|
||||
- key: k8s-app
|
||||
operator: Exists
|
||||
version: v2.2.1
|
||||
version: v2.3.1
|
||||
```
|
||||
|
||||
> Make sure that the `ServiceAccount` called `prometheus-k8s` exists and if using RBAC, is bound to the correct role. Read more on [RBAC when using the Prometheus Operator](../rbac.md).
|
||||
|
@ -595,7 +595,7 @@ spec:
|
|||
beta.kubernetes.io/os: linux
|
||||
replicas: 3
|
||||
serviceAccountName: alertmanager-main
|
||||
version: v0.14.0
|
||||
version: v0.15.0
|
||||
```
|
||||
|
||||
Read more in the [alerting guide](alerting.md) on how to configure the Alertmanager as it will not spin up unless it has a valid configuration mounted through a `Secret`. Note that the `Secret` has to be in the same namespace as the `Alertmanager` resource as well as have the name `alertmanager-<name-of-alertmanager-object` and the key of the configuration is `alertmanager.yaml`.
|
||||
|
|
7
Makefile
7
Makefile
|
@ -1,6 +1,7 @@
|
|||
SHELL=/bin/bash -o pipefail
|
||||
|
||||
REPO?=quay.io/coreos/prometheus-operator
|
||||
REPO_PROMETHEUS_CONFIG_RELOADER?=quay.io/coreos/prometheus-config-reloader
|
||||
TAG?=$(shell git rev-parse --short HEAD)
|
||||
|
||||
PO_CRDGEN_BINARY:=$(GOPATH)/bin/po-crdgen
|
||||
|
@ -36,7 +37,7 @@ prometheus-config-reloader:
|
|||
-ldflags "-X github.com/coreos/prometheus-operator/pkg/version.Version=$(shell cat VERSION)" \
|
||||
-o $@ cmd/$@/main.go
|
||||
|
||||
pkg/client/monitoring/v1/zz_generated.deepcopy.go: $(DEEPCOPY_GEN_BINARY)
|
||||
pkg/client/monitoring/v1/zz_generated.deepcopy.go: .header pkg/client/monitoring/v1/types.go $(DEEPCOPY_GEN_BINARY)
|
||||
$(DEEPCOPY_GEN_BINARY) \
|
||||
-i github.com/coreos/prometheus-operator/pkg/client/monitoring/v1 \
|
||||
--go-header-file="$(GOPATH)/src/github.com/coreos/prometheus-operator/.header" \
|
||||
|
@ -68,7 +69,7 @@ hack/prometheus-config-reloader-image: cmd/prometheus-config-reloader/Dockerfile
|
|||
# Create empty target file, for the sole purpose of recording when this target
|
||||
# was last executed via the last-modification timestamp on the file. See
|
||||
# https://www.gnu.org/software/make/manual/make.html#Empty-Targets
|
||||
docker build -t quay.io/coreos/prometheus-config-reloader:$(TAG) -f cmd/prometheus-config-reloader/Dockerfile .
|
||||
docker build -t $(REPO_PROMETHEUS_CONFIG_RELOADER):$(TAG) -f cmd/prometheus-config-reloader/Dockerfile .
|
||||
touch $@
|
||||
|
||||
|
||||
|
@ -77,7 +78,7 @@ hack/prometheus-config-reloader-image: cmd/prometheus-config-reloader/Dockerfile
|
|||
##############
|
||||
|
||||
.PHONY: generate
|
||||
generate: Documentation/*
|
||||
generate: pkg/client/monitoring/v1/zz_generated.deepcopy.go pkg/client/monitoring/v1/openapi_generated.go kube-prometheus Documentation/*
|
||||
|
||||
.PHONY: generate-in-docker
|
||||
generate-in-docker: hack/jsonnet-docker-image
|
||||
|
|
|
@ -78,6 +78,7 @@ func init() {
|
|||
flagset.StringVar(&cfg.ConfigReloaderImage, "config-reloader-image", "quay.io/coreos/configmap-reload:v0.0.1", "Reload Image")
|
||||
flagset.StringVar(&cfg.AlertmanagerDefaultBaseImage, "alertmanager-default-base-image", "quay.io/prometheus/alertmanager", "Alertmanager default base image")
|
||||
flagset.StringVar(&cfg.PrometheusDefaultBaseImage, "prometheus-default-base-image", "quay.io/prometheus/prometheus", "Prometheus default base image")
|
||||
flagset.StringVar(&cfg.ThanosDefaultBaseImage, "thanos-default-base-image", "improbable/thanos", "Thanos default base image")
|
||||
flagset.StringVar(&cfg.Namespace, "namespace", v1.NamespaceAll, "Namespace to scope the interaction of the Prometheus Operator and the apiserver.")
|
||||
flagset.Var(&cfg.Labels, "labels", "Labels to be add to all resources created by the operator")
|
||||
flagset.StringVar(&cfg.CrdGroup, "crd-apigroup", monitoringv1.Group, "prometheus CRD API group name")
|
||||
|
|
|
@ -1,73 +0,0 @@
|
|||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: Prometheus
|
||||
metadata:
|
||||
name: self
|
||||
labels:
|
||||
prometheus: self
|
||||
spec:
|
||||
podMetadata:
|
||||
labels:
|
||||
thanos-peer: 'true'
|
||||
replicas: 2
|
||||
version: v2.2.1
|
||||
serviceAccountName: prometheus-k8s
|
||||
serviceMonitorSelector:
|
||||
matchLabels:
|
||||
app: prometheus
|
||||
ruleSelector:
|
||||
matchLabels:
|
||||
role: prometheus-rulefiles
|
||||
prometheus: k8s
|
||||
resources:
|
||||
requests:
|
||||
# 2Gi is default, but won't schedule if you don't have a node with >2Gi
|
||||
# memory. Modify based on your target and time-series count for
|
||||
# production use. This value is mainly meant for demonstration/testing
|
||||
# purposes.
|
||||
memory: 400Mi
|
||||
containers:
|
||||
- name: thanos
|
||||
image: improbable/thanos:latest
|
||||
args:
|
||||
- "sidecar"
|
||||
- "--log.level=debug"
|
||||
- "--cluster.peers=thanos-peers.default.svc:10900"
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 10902
|
||||
- name: grpc
|
||||
containerPort: 10901
|
||||
- name: cluster
|
||||
containerPort: 10900
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: prometheus
|
||||
labels:
|
||||
app: prometheus
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: prometheus
|
||||
endpoints:
|
||||
- port: web
|
||||
interval: 30s
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app: prometheus
|
||||
prometheus: self
|
||||
name: prometheus-self
|
||||
spec:
|
||||
type: NodePort
|
||||
ports:
|
||||
- name: web
|
||||
nodePort: 30900
|
||||
port: 9090
|
||||
protocol: TCP
|
||||
targetPort: web
|
||||
selector:
|
||||
prometheus: self
|
|
@ -5,7 +5,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
|||
namespace: 'default',
|
||||
|
||||
versions+:: {
|
||||
alertmanager: 'v0.14.0',
|
||||
alertmanager: 'v0.15.0',
|
||||
},
|
||||
|
||||
imageRepos+:: {
|
||||
|
|
|
@ -5,8 +5,8 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
|||
namespace: 'default',
|
||||
|
||||
versions+:: {
|
||||
kubeStateMetrics: 'v1.3.0',
|
||||
kubeRbacProxy: 'v0.3.0',
|
||||
kubeStateMetrics: 'v1.3.1',
|
||||
kubeRbacProxy: 'v0.3.1',
|
||||
addonResizer: '1.0',
|
||||
},
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
|||
|
||||
versions+:: {
|
||||
nodeExporter: 'v0.15.2',
|
||||
kubeRbacProxy: 'v0.3.0',
|
||||
kubeRbacProxy: 'v0.3.1',
|
||||
},
|
||||
|
||||
imageRepos+:: {
|
||||
|
|
|
@ -5,7 +5,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
|||
namespace: 'default',
|
||||
|
||||
versions+:: {
|
||||
prometheus: 'v2.2.1',
|
||||
prometheus: 'v2.3.1',
|
||||
},
|
||||
|
||||
imageRepos+:: {
|
||||
|
|
|
@ -2672,6 +2672,77 @@ spec:
|
|||
phase:
|
||||
description: Phase represents the current phase of PersistentVolumeClaim.
|
||||
type: string
|
||||
thanos:
|
||||
description: ThanosSpec defines parameters for a Prometheus server within
|
||||
a Thanos deployment.
|
||||
properties:
|
||||
baseImage:
|
||||
description: Thanos base image if other than default.
|
||||
type: string
|
||||
gcs:
|
||||
description: ThanosGCSSpec defines parameters for use of Google
|
||||
Cloud Storage (GCS) with Thanos.
|
||||
properties:
|
||||
bucket:
|
||||
description: Google Cloud Storage bucket name for stored blocks.
|
||||
If empty it won't store any block inside Google Cloud Storage.
|
||||
type: string
|
||||
peers:
|
||||
description: Peers is a DNS name for Thanos to discover peers through.
|
||||
type: string
|
||||
s3:
|
||||
description: ThanosSpec defines parameters for of AWS Simple Storage
|
||||
Service (S3) with Thanos. (S3 compatible services apply as well)
|
||||
properties:
|
||||
accessKey:
|
||||
description: SecretKeySelector selects a key of a Secret.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or it's key must
|
||||
be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
bucket:
|
||||
description: S3-Compatible API bucket name for stored blocks.
|
||||
type: string
|
||||
endpoint:
|
||||
description: S3-Compatible API endpoint for stored blocks.
|
||||
type: string
|
||||
insecure:
|
||||
description: Whether to use an insecure connection with an S3-Compatible
|
||||
API.
|
||||
type: boolean
|
||||
secretKey:
|
||||
description: SecretKeySelector selects a key of a Secret.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or it's key must
|
||||
be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
signatureVersion2:
|
||||
description: Whether to use S3 Signature Version 2; otherwise
|
||||
Signature Version 4 will be used.
|
||||
type: boolean
|
||||
version:
|
||||
description: Version describes the version of Thanos to use.
|
||||
type: string
|
||||
tolerations:
|
||||
description: If specified, the pod's tolerations.
|
||||
items:
|
||||
|
|
|
@ -11,4 +11,4 @@ spec:
|
|||
beta.kubernetes.io/os: linux
|
||||
replicas: 3
|
||||
serviceAccountName: alertmanager-main
|
||||
version: v0.14.0
|
||||
version: v0.15.0
|
||||
|
|
|
@ -64,7 +64,7 @@ items:
|
|||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{node}}",
|
||||
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
|
||||
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
|
@ -150,7 +150,7 @@ items:
|
|||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{node}}",
|
||||
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
|
||||
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
|
@ -248,7 +248,7 @@ items:
|
|||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{node}}",
|
||||
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
|
||||
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
|
@ -334,7 +334,7 @@ items:
|
|||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{node}}",
|
||||
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
|
||||
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
|
@ -432,7 +432,7 @@ items:
|
|||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{node}}",
|
||||
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
|
||||
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
|
@ -518,7 +518,7 @@ items:
|
|||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{node}}",
|
||||
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
|
||||
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
|
@ -616,7 +616,7 @@ items:
|
|||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{node}}",
|
||||
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
|
||||
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
|
@ -702,7 +702,7 @@ items:
|
|||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{node}}",
|
||||
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
|
||||
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
|
@ -800,7 +800,7 @@ items:
|
|||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{node}}",
|
||||
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
|
||||
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
|
@ -909,6 +909,7 @@ items:
|
|||
},
|
||||
"timezone": "utc",
|
||||
"title": "K8s / USE Method / Cluster",
|
||||
"uid": "a6e7d1362e1ddbb79db21d5bb40d7137",
|
||||
"version": 0
|
||||
}
|
||||
kind: ConfigMap
|
||||
|
@ -1851,6 +1852,7 @@ items:
|
|||
},
|
||||
"timezone": "utc",
|
||||
"title": "K8s / USE Method / Node",
|
||||
"uid": "4ac4f123aae0ff6dbaf4f4f66120033b",
|
||||
"version": 0
|
||||
}
|
||||
kind: ConfigMap
|
||||
|
@ -2468,7 +2470,7 @@ items:
|
|||
"decimals": 2,
|
||||
"link": true,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "/dashboard/file/k8s-resources-namespace.json?var-datasource=$datasource&var-namespace=$__cell",
|
||||
"linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-namespace=$__cell",
|
||||
"pattern": "namespace",
|
||||
"thresholds": [
|
||||
|
||||
|
@ -2828,7 +2830,7 @@ items:
|
|||
"decimals": 2,
|
||||
"link": true,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "/dashboard/file/k8s-resources-namespace.json?var-datasource=$datasource&var-namespace=$__cell",
|
||||
"linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-namespace=$__cell",
|
||||
"pattern": "namespace",
|
||||
"thresholds": [
|
||||
|
||||
|
@ -3000,6 +3002,7 @@ items:
|
|||
},
|
||||
"timezone": "utc",
|
||||
"title": "K8s / Compute Resources / Cluster",
|
||||
"uid": "efa86fd1d0c121a26444b636a3f509a8",
|
||||
"version": 0
|
||||
}
|
||||
kind: ConfigMap
|
||||
|
@ -3269,7 +3272,7 @@ items:
|
|||
"decimals": 2,
|
||||
"link": true,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "/dashboard/file/k8s-resources-pod.json?var-datasource=$datasource&var-namespace=$namespace&var-pod=$__cell",
|
||||
"linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-namespace=$namespace&var-pod=$__cell",
|
||||
"pattern": "pod",
|
||||
"thresholds": [
|
||||
|
||||
|
@ -3629,7 +3632,7 @@ items:
|
|||
"decimals": 2,
|
||||
"link": true,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "/dashboard/file/k8s-resources-pod.json?var-datasource=$datasource&var-namespace=$namespace&var-pod=$__cell",
|
||||
"linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-namespace=$namespace&var-pod=$__cell",
|
||||
"pattern": "pod",
|
||||
"thresholds": [
|
||||
|
||||
|
@ -3828,6 +3831,7 @@ items:
|
|||
},
|
||||
"timezone": "utc",
|
||||
"title": "K8s / Compute Resources / Namespace",
|
||||
"uid": "85a562078cdf77779eaa1add43ccec1e",
|
||||
"version": 0
|
||||
}
|
||||
kind: ConfigMap
|
||||
|
@ -4683,6 +4687,7 @@ items:
|
|||
},
|
||||
"timezone": "utc",
|
||||
"title": "K8s / Compute Resources / Pod",
|
||||
"uid": "6581e46e4e5c7ba40a07646395ef7b23",
|
||||
"version": 0
|
||||
}
|
||||
kind: ConfigMap
|
||||
|
@ -5609,6 +5614,7 @@ items:
|
|||
},
|
||||
"timezone": "browser",
|
||||
"title": "Nodes",
|
||||
"uid": "fa49a4706d07a042595b664c87fb33ea",
|
||||
"version": 0
|
||||
}
|
||||
kind: ConfigMap
|
||||
|
@ -6098,6 +6104,7 @@ items:
|
|||
},
|
||||
"timezone": "browser",
|
||||
"title": "Pods",
|
||||
"uid": "ab4f13a9892a76a4d21ce8c2445bf4ea",
|
||||
"version": 0
|
||||
}
|
||||
kind: ConfigMap
|
||||
|
@ -6950,6 +6957,7 @@ items:
|
|||
},
|
||||
"timezone": "browser",
|
||||
"title": "StatefulSets",
|
||||
"uid": "a31c1f46e6f727cb37c0d731a7245005",
|
||||
"version": 0
|
||||
}
|
||||
kind: ConfigMap
|
||||
|
|
|
@ -19,7 +19,7 @@ spec:
|
|||
- args:
|
||||
- --secure-listen-address=:8443
|
||||
- --upstream=http://127.0.0.1:8081/
|
||||
image: quay.io/coreos/kube-rbac-proxy:v0.3.0
|
||||
image: quay.io/coreos/kube-rbac-proxy:v0.3.1
|
||||
name: kube-rbac-proxy-main
|
||||
ports:
|
||||
- containerPort: 8443
|
||||
|
@ -34,7 +34,7 @@ spec:
|
|||
- args:
|
||||
- --secure-listen-address=:9443
|
||||
- --upstream=http://127.0.0.1:8082/
|
||||
image: quay.io/coreos/kube-rbac-proxy:v0.3.0
|
||||
image: quay.io/coreos/kube-rbac-proxy:v0.3.1
|
||||
name: kube-rbac-proxy-self
|
||||
ports:
|
||||
- containerPort: 9443
|
||||
|
@ -51,7 +51,7 @@ spec:
|
|||
- --port=8081
|
||||
- --telemetry-host=127.0.0.1
|
||||
- --telemetry-port=8082
|
||||
image: quay.io/coreos/kube-state-metrics:v1.3.0
|
||||
image: quay.io/coreos/kube-state-metrics:v1.3.1
|
||||
name: kube-state-metrics
|
||||
resources:
|
||||
limits:
|
||||
|
|
|
@ -38,7 +38,7 @@ spec:
|
|||
- args:
|
||||
- --secure-listen-address=:9100
|
||||
- --upstream=http://127.0.0.1:9101/
|
||||
image: quay.io/coreos/kube-rbac-proxy:v0.3.0
|
||||
image: quay.io/coreos/kube-rbac-proxy:v0.3.1
|
||||
name: kube-rbac-proxy
|
||||
ports:
|
||||
- containerPort: 9100
|
||||
|
|
|
@ -27,4 +27,4 @@ spec:
|
|||
matchExpressions:
|
||||
- key: k8s-app
|
||||
operator: Exists
|
||||
version: v2.2.1
|
||||
version: v2.3.1
|
||||
|
|
|
@ -202,21 +202,21 @@ spec:
|
|||
)
|
||||
record: node:node_memory_swap_io_bytes:sum_rate
|
||||
- expr: |
|
||||
avg(irate(node_disk_io_time_ms{job="node-exporter",device=~"(sd|xvd).+"}[1m]) / 1e3)
|
||||
avg(irate(node_disk_io_time_ms{job="node-exporter",device=~"(sd|xvd|nvme).+"}[1m]) / 1e3)
|
||||
record: :node_disk_utilisation:avg_irate
|
||||
- expr: |
|
||||
avg by (node) (
|
||||
irate(node_disk_io_time_ms{job="node-exporter",device=~"(sd|xvd).+"}[1m]) / 1e3
|
||||
irate(node_disk_io_time_ms{job="node-exporter",device=~"(sd|xvd|nvme).+"}[1m]) / 1e3
|
||||
* on (namespace, pod) group_left(node)
|
||||
node_namespace_pod:kube_pod_info:
|
||||
)
|
||||
record: node:node_disk_utilisation:avg_irate
|
||||
- expr: |
|
||||
avg(irate(node_disk_io_time_weighted{job="node-exporter",device=~"(sd|xvd).+"}[1m]) / 1e3)
|
||||
avg(irate(node_disk_io_time_weighted{job="node-exporter",device=~"(sd|xvd|nvme).+"}[1m]) / 1e3)
|
||||
record: :node_disk_saturation:avg_irate
|
||||
- expr: |
|
||||
avg by (node) (
|
||||
irate(node_disk_io_time_weighted{job="node-exporter",device=~"(sd|xvd).+"}[1m]) / 1e3
|
||||
irate(node_disk_io_time_weighted{job="node-exporter",device=~"(sd|xvd|nvme).+"}[1m]) / 1e3
|
||||
* on (namespace, pod) group_left(node)
|
||||
node_namespace_pod:kube_pod_info:
|
||||
)
|
||||
|
@ -268,6 +268,7 @@ spec:
|
|||
- alert: AlertmanagerDown
|
||||
annotations:
|
||||
message: Alertmanager has disappeared from Prometheus target discovery.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-alertmanagerdown
|
||||
expr: |
|
||||
absent(up{job="alertmanager-main"} == 1)
|
||||
for: 15m
|
||||
|
@ -276,6 +277,7 @@ spec:
|
|||
- alert: KubeAPIDown
|
||||
annotations:
|
||||
message: KubeAPI has disappeared from Prometheus target discovery.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapidown
|
||||
expr: |
|
||||
absent(up{job="apiserver"} == 1)
|
||||
for: 15m
|
||||
|
@ -284,6 +286,7 @@ spec:
|
|||
- alert: KubeControllerManagerDown
|
||||
annotations:
|
||||
message: KubeControllerManager has disappeared from Prometheus target discovery.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown
|
||||
expr: |
|
||||
absent(up{job="kube-controller-manager"} == 1)
|
||||
for: 15m
|
||||
|
@ -292,6 +295,7 @@ spec:
|
|||
- alert: KubeSchedulerDown
|
||||
annotations:
|
||||
message: KubeScheduler has disappeared from Prometheus target discovery.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeschedulerdown
|
||||
expr: |
|
||||
absent(up{job="kube-scheduler"} == 1)
|
||||
for: 15m
|
||||
|
@ -300,6 +304,7 @@ spec:
|
|||
- alert: KubeStateMetricsDown
|
||||
annotations:
|
||||
message: KubeStateMetrics has disappeared from Prometheus target discovery.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricsdown
|
||||
expr: |
|
||||
absent(up{job="kube-state-metrics"} == 1)
|
||||
for: 15m
|
||||
|
@ -308,6 +313,7 @@ spec:
|
|||
- alert: KubeletDown
|
||||
annotations:
|
||||
message: Kubelet has disappeared from Prometheus target discovery.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletdown
|
||||
expr: |
|
||||
absent(up{job="kubelet"} == 1)
|
||||
for: 15m
|
||||
|
@ -316,6 +322,7 @@ spec:
|
|||
- alert: NodeExporterDown
|
||||
annotations:
|
||||
message: NodeExporter has disappeared from Prometheus target discovery.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeexporterdown
|
||||
expr: |
|
||||
absent(up{job="node-exporter"} == 1)
|
||||
for: 15m
|
||||
|
@ -324,6 +331,7 @@ spec:
|
|||
- alert: PrometheusDown
|
||||
annotations:
|
||||
message: Prometheus has disappeared from Prometheus target discovery.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusdown
|
||||
expr: |
|
||||
absent(up{job="prometheus-k8s"} == 1)
|
||||
for: 15m
|
||||
|
@ -332,6 +340,7 @@ spec:
|
|||
- alert: PrometheusOperatorDown
|
||||
annotations:
|
||||
message: PrometheusOperator has disappeared from Prometheus target discovery.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatordown
|
||||
expr: |
|
||||
absent(up{job="prometheus-operator"} == 1)
|
||||
for: 15m
|
||||
|
@ -343,6 +352,7 @@ spec:
|
|||
annotations:
|
||||
message: '{{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
|
||||
}}) is restarting {{ printf "%.2f" $value }} / second'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping
|
||||
expr: |
|
||||
rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[15m]) > 0
|
||||
for: 1h
|
||||
|
@ -351,6 +361,7 @@ spec:
|
|||
- alert: KubePodNotReady
|
||||
annotations:
|
||||
message: '{{ $labels.namespace }}/{{ $labels.pod }} is not ready.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready
|
||||
expr: |
|
||||
sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase!~"Running|Succeeded"}) > 0
|
||||
for: 1h
|
||||
|
@ -360,6 +371,7 @@ spec:
|
|||
annotations:
|
||||
message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} generation
|
||||
mismatch
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentgenerationmismatch
|
||||
expr: |
|
||||
kube_deployment_status_observed_generation{job="kube-state-metrics"}
|
||||
!=
|
||||
|
@ -371,6 +383,7 @@ spec:
|
|||
annotations:
|
||||
message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} replica
|
||||
mismatch
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch
|
||||
expr: |
|
||||
kube_deployment_spec_replicas{job="kube-state-metrics"}
|
||||
!=
|
||||
|
@ -382,6 +395,7 @@ spec:
|
|||
annotations:
|
||||
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} replica
|
||||
mismatch
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch
|
||||
expr: |
|
||||
kube_statefulset_status_replicas_ready{job="kube-state-metrics"}
|
||||
!=
|
||||
|
@ -393,6 +407,7 @@ spec:
|
|||
annotations:
|
||||
message: StatefulSet {{ $labels.namespace }}/{{ labels.statefulset }} generation
|
||||
mismatch
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetgenerationmismatch
|
||||
expr: |
|
||||
kube_statefulset_status_observed_generation{job="kube-state-metrics"}
|
||||
!=
|
||||
|
@ -404,6 +419,7 @@ spec:
|
|||
annotations:
|
||||
message: Only {{$value}}% of desired pods scheduled and ready for daemon set
|
||||
{{$labels.namespace}}/{{$labels.daemonset}}
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetrolloutstuck
|
||||
expr: |
|
||||
kube_daemonset_status_number_ready{job="kube-state-metrics"}
|
||||
/
|
||||
|
@ -415,6 +431,7 @@ spec:
|
|||
annotations:
|
||||
message: A number of pods of daemonset {{$labels.namespace}}/{{$labels.daemonset}}
|
||||
are not scheduled.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetnotscheduled
|
||||
expr: |
|
||||
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"}
|
||||
-
|
||||
|
@ -426,17 +443,48 @@ spec:
|
|||
annotations:
|
||||
message: A number of pods of daemonset {{$labels.namespace}}/{{$labels.daemonset}}
|
||||
are running where they are not supposed to run.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetmisscheduled
|
||||
expr: |
|
||||
kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeCronJobRunning
|
||||
annotations:
|
||||
message: CronJob {{ $labels.namespaces }}/{{ $labels.cronjob }} is taking
|
||||
more than 1h to complete.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecronjobrunning
|
||||
expr: |
|
||||
time() - kube_cronjob_next_schedule_time{job="kube-state-metrics"} > 3600
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeJobCompletion
|
||||
annotations:
|
||||
message: Job {{ $labels.namespaces }}/{{ $labels.job }} is taking more than
|
||||
1h to complete.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion
|
||||
expr: |
|
||||
kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"} > 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeJobFailed
|
||||
annotations:
|
||||
message: Job {{ $labels.namespaces }}/{{ $labels.job }} failed to complete.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobfailed
|
||||
expr: |
|
||||
kube_job_status_failed{job="kube-state-metrics"} > 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- name: kubernetes-resources
|
||||
rules:
|
||||
- alert: KubeCPUOvercommit
|
||||
annotations:
|
||||
message: Overcommited CPU resource requests on Pods, cannot tolerate node
|
||||
failure.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
|
||||
expr: |
|
||||
sum(namespace_name:kube_pod_container_resource_requests_cpu_cores:sum)
|
||||
/
|
||||
|
@ -450,6 +498,7 @@ spec:
|
|||
annotations:
|
||||
message: Overcommited Memory resource requests on Pods, cannot tolerate node
|
||||
failure.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememovercommit
|
||||
expr: |
|
||||
sum(namespace_name:kube_pod_container_resource_requests_memory_bytes:sum)
|
||||
/
|
||||
|
@ -464,6 +513,7 @@ spec:
|
|||
- alert: KubeCPUOvercommit
|
||||
annotations:
|
||||
message: Overcommited CPU resource request quota on Namespaces.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
|
||||
expr: |
|
||||
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="requests.cpu"})
|
||||
/
|
||||
|
@ -475,6 +525,7 @@ spec:
|
|||
- alert: KubeMemOvercommit
|
||||
annotations:
|
||||
message: Overcommited Memory resource request quota on Namespaces.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememovercommit
|
||||
expr: |
|
||||
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="requests.memory"})
|
||||
/
|
||||
|
@ -487,6 +538,7 @@ spec:
|
|||
annotations:
|
||||
message: '{{ printf "%0.0f" $value }}% usage of {{ $labels.resource }} in
|
||||
namespace {{ $labels.namespace }}.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaexceeded
|
||||
expr: |
|
||||
100 * kube_resourcequota{job="kube-state-metrics", type="used"}
|
||||
/ ignoring(instance, job, type)
|
||||
|
@ -502,6 +554,7 @@ spec:
|
|||
message: The persistent volume claimed by {{ $labels.persistentvolumeclaim
|
||||
}} in namespace {{ $labels.namespace }} has {{ printf "%0.0f" $value }}%
|
||||
free.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeusagecritical
|
||||
expr: |
|
||||
100 * kubelet_volume_stats_available_bytes{job="kubelet"}
|
||||
/
|
||||
|
@ -515,6 +568,7 @@ spec:
|
|||
message: Based on recent sampling, the persistent volume claimed by {{ $labels.persistentvolumeclaim
|
||||
}} in namespace {{ $labels.namespace }} is expected to fill up within four
|
||||
days.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefullinfourdays
|
||||
expr: |
|
||||
predict_linear(kubelet_volume_stats_available_bytes{job="kubelet"}[1h], 4 * 24 * 3600) < 0
|
||||
for: 5m
|
||||
|
@ -525,6 +579,7 @@ spec:
|
|||
- alert: KubeNodeNotReady
|
||||
annotations:
|
||||
message: '{{ $labels.node }} has been unready for more than an hour'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodenotready
|
||||
expr: |
|
||||
kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0
|
||||
for: 1h
|
||||
|
@ -534,6 +589,7 @@ spec:
|
|||
annotations:
|
||||
message: There are {{ $value }} different versions of Kubernetes components
|
||||
running.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeversionmismatch
|
||||
expr: |
|
||||
count(count(kubernetes_build_info{job!="kube-dns"}) by (gitVersion)) > 1
|
||||
for: 1h
|
||||
|
@ -543,6 +599,7 @@ spec:
|
|||
annotations:
|
||||
message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance
|
||||
}}' is experiencing {{ printf "%0.0f" $value }}% errors.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors
|
||||
expr: |
|
||||
sum(rate(rest_client_requests_total{code!~"2.."}[5m])) by (instance, job) * 100
|
||||
/
|
||||
|
@ -555,6 +612,7 @@ spec:
|
|||
annotations:
|
||||
message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance
|
||||
}}' is experiencing {{ printf "%0.0f" $value }} errors / sec.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors
|
||||
expr: |
|
||||
sum(rate(ksm_scrape_error_total{job="kube-state-metrics"}[5m])) by (instance, job) > 0.1
|
||||
for: 15m
|
||||
|
@ -564,6 +622,7 @@ spec:
|
|||
annotations:
|
||||
message: Kubelet {{$labels.instance}} is running {{$value}} pods, close to
|
||||
the limit of 110.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods
|
||||
expr: |
|
||||
kubelet_running_pod_count{job="kubelet"} > 100
|
||||
for: 15m
|
||||
|
@ -573,6 +632,7 @@ spec:
|
|||
annotations:
|
||||
message: The API server has a 99th percentile latency of {{ $value }} seconds
|
||||
for {{$labels.verb}} {{$labels.resource}}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
|
||||
expr: |
|
||||
cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:WATCH|WATCHLIST|PROXY|CONNECT)$"} > 1
|
||||
for: 10m
|
||||
|
@ -582,6 +642,7 @@ spec:
|
|||
annotations:
|
||||
message: The API server has a 99th percentile latency of {{ $value }} seconds
|
||||
for {{$labels.verb}} {{$labels.resource}}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
|
||||
expr: |
|
||||
cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:WATCH|WATCHLIST|PROXY|CONNECT)$"} > 4
|
||||
for: 10m
|
||||
|
@ -590,6 +651,7 @@ spec:
|
|||
- alert: KubeAPIErrorsHigh
|
||||
annotations:
|
||||
message: API server is erroring for {{ $value }}% of requests.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
|
||||
expr: |
|
||||
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) without(instance, pod)
|
||||
/
|
||||
|
@ -600,6 +662,7 @@ spec:
|
|||
- alert: KubeAPIErrorsHigh
|
||||
annotations:
|
||||
message: API server is erroring for {{ $value }}% of requests.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
|
||||
expr: |
|
||||
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) without(instance, pod)
|
||||
/
|
||||
|
@ -610,6 +673,7 @@ spec:
|
|||
- alert: KubeClientCertificateExpiration
|
||||
annotations:
|
||||
message: Kubernetes API certificate is expiring in less than 7 days.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
|
||||
expr: |
|
||||
histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
|
||||
labels:
|
||||
|
@ -617,6 +681,7 @@ spec:
|
|||
- alert: KubeClientCertificateExpiration
|
||||
annotations:
|
||||
message: Kubernetes API certificate is expiring in less than 1 day.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
|
||||
expr: |
|
||||
histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
|
||||
labels:
|
||||
|
|
|
@ -1802,6 +1802,42 @@ spec:
|
|||
proxyUrl:
|
||||
description: Optional ProxyURL
|
||||
type: string
|
||||
queueConfig:
|
||||
description: QueueConfig allows the tuning of remote_write queue_config
|
||||
parameters. This object is referenced in the RemoteWriteSpec
|
||||
object.
|
||||
properties:
|
||||
batchSendDeadline:
|
||||
description: BatchSendDeadline is the maximum time a sample
|
||||
will wait in buffer.
|
||||
type: string
|
||||
capacity:
|
||||
description: Capacity is the number of samples to buffer per
|
||||
shard before we start dropping them.
|
||||
format: int32
|
||||
type: integer
|
||||
maxBackoff:
|
||||
description: MaxBackoff is the maximum retry delay.
|
||||
type: string
|
||||
maxRetries:
|
||||
description: MaxRetries is the maximum number of times to
|
||||
retry a batch on recoverable errors.
|
||||
format: int32
|
||||
type: integer
|
||||
maxSamplesPerSend:
|
||||
description: MaxSamplesPerSend is the maximum number of samples
|
||||
per send.
|
||||
format: int32
|
||||
type: integer
|
||||
maxShards:
|
||||
description: MaxShards is the maximum number of shards, i.e.
|
||||
amount of concurrency.
|
||||
format: int32
|
||||
type: integer
|
||||
minBackoff:
|
||||
description: MinBackoff is the initial retry delay. Gets doubled
|
||||
for every retry.
|
||||
type: string
|
||||
remoteTimeout:
|
||||
description: Timeout for requests to the remote write endpoint.
|
||||
type: string
|
||||
|
@ -2673,6 +2709,77 @@ spec:
|
|||
phase:
|
||||
description: Phase represents the current phase of PersistentVolumeClaim.
|
||||
type: string
|
||||
thanos:
|
||||
description: ThanosSpec defines parameters for a Prometheus server within
|
||||
a Thanos deployment.
|
||||
properties:
|
||||
baseImage:
|
||||
description: Thanos base image if other than default.
|
||||
type: string
|
||||
gcs:
|
||||
description: ThanosGCSSpec defines parameters for use of Google
|
||||
Cloud Storage (GCS) with Thanos.
|
||||
properties:
|
||||
bucket:
|
||||
description: Google Cloud Storage bucket name for stored blocks.
|
||||
If empty it won't store any block inside Google Cloud Storage.
|
||||
type: string
|
||||
peers:
|
||||
description: Peers is a DNS name for Thanos to discover peers through.
|
||||
type: string
|
||||
s3:
|
||||
description: ThanosSpec defines parameters for of AWS Simple Storage
|
||||
Service (S3) with Thanos. (S3 compatible services apply as well)
|
||||
properties:
|
||||
accessKey:
|
||||
description: SecretKeySelector selects a key of a Secret.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or it's key must
|
||||
be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
bucket:
|
||||
description: S3-Compatible API bucket name for stored blocks.
|
||||
type: string
|
||||
endpoint:
|
||||
description: S3-Compatible API endpoint for stored blocks.
|
||||
type: string
|
||||
insecure:
|
||||
description: Whether to use an insecure connection with an S3-Compatible
|
||||
API.
|
||||
type: boolean
|
||||
secretKey:
|
||||
description: SecretKeySelector selects a key of a Secret.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or it's key must
|
||||
be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
signatureVersion2:
|
||||
description: Whether to use S3 Signature Version 2; otherwise
|
||||
Signature Version 4 will be used.
|
||||
type: boolean
|
||||
version:
|
||||
description: Version describes the version of Thanos to use.
|
||||
type: string
|
||||
tolerations:
|
||||
description: If specified, the pod's tolerations.
|
||||
items:
|
||||
|
|
13
example/thanos/prometheus-role-binding.yaml
Normal file
13
example/thanos/prometheus-role-binding.yaml
Normal file
|
@ -0,0 +1,13 @@
|
|||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: prometheus-self
|
||||
namespace: default
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: prometheus-self
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: default
|
||||
namespace: default
|
17
example/thanos/prometheus-role.yaml
Normal file
17
example/thanos/prometheus-role.yaml
Normal file
|
@ -0,0 +1,17 @@
|
|||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: prometheus-self
|
||||
namespace: default
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes
|
||||
- services
|
||||
- endpoints
|
||||
- pods
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
15
example/thanos/prometheus-service.yaml
Normal file
15
example/thanos/prometheus-service.yaml
Normal file
|
@ -0,0 +1,15 @@
|
|||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app: prometheus
|
||||
prometheus: self
|
||||
name: prometheus-self
|
||||
spec:
|
||||
ports:
|
||||
- name: web
|
||||
port: 9090
|
||||
protocol: TCP
|
||||
targetPort: web
|
||||
selector:
|
||||
prometheus: self
|
13
example/thanos/prometheus-servicemonitor.yaml
Normal file
13
example/thanos/prometheus-servicemonitor.yaml
Normal file
|
@ -0,0 +1,13 @@
|
|||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: prometheus
|
||||
labels:
|
||||
app: prometheus
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: prometheus
|
||||
endpoints:
|
||||
- port: web
|
||||
interval: 30s
|
20
example/thanos/prometheus.yaml
Normal file
20
example/thanos/prometheus.yaml
Normal file
|
@ -0,0 +1,20 @@
|
|||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: Prometheus
|
||||
metadata:
|
||||
name: self
|
||||
labels:
|
||||
prometheus: self
|
||||
spec:
|
||||
podMetadata:
|
||||
labels:
|
||||
thanos-peer: 'true'
|
||||
replicas: 2
|
||||
serviceMonitorSelector:
|
||||
matchLabels:
|
||||
app: prometheus
|
||||
ruleSelector:
|
||||
matchLabels:
|
||||
role: prometheus-rulefiles
|
||||
prometheus: k8s
|
||||
thanos:
|
||||
peers: thanos-peers.default.svc:10900
|
|
@ -6,7 +6,7 @@ metadata:
|
|||
app: thanos-query
|
||||
thanos-peer: "true"
|
||||
spec:
|
||||
replicas: 2
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: thanos-query
|
||||
|
@ -19,7 +19,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: thanos-query
|
||||
image: improbable/thanos:latest
|
||||
image: improbable/thanos:v0.1.0-rc.1
|
||||
args:
|
||||
- "query"
|
||||
- "--log.level=debug"
|
||||
|
@ -31,21 +31,4 @@ spec:
|
|||
- name: grpc
|
||||
containerPort: 10901
|
||||
- name: cluster
|
||||
containerPort: 10900
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app: thanos-query
|
||||
name: thanos-query
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: thanos-query
|
||||
ports:
|
||||
- port: 9090
|
||||
protocol: TCP
|
||||
targetPort: http
|
||||
name: http-query
|
||||
nodePort: 31111
|
||||
containerPort: 10900
|
14
example/thanos/querier-service.yaml
Normal file
14
example/thanos/querier-service.yaml
Normal file
|
@ -0,0 +1,14 @@
|
|||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app: thanos-query
|
||||
name: thanos-query
|
||||
spec:
|
||||
selector:
|
||||
app: thanos-query
|
||||
ports:
|
||||
- port: 9090
|
||||
protocol: TCP
|
||||
targetPort: http
|
||||
name: http-query
|
|
@ -10,5 +10,4 @@ spec:
|
|||
port: 10900
|
||||
targetPort: cluster
|
||||
selector:
|
||||
# Useful endpoint for gathering all thanos components for common gossip cluster.
|
||||
thanos-peer: "true"
|
File diff suppressed because one or more lines are too long
|
@ -33,7 +33,7 @@ import (
|
|||
|
||||
const (
|
||||
governingServiceName = "alertmanager-operated"
|
||||
defaultVersion = "v0.14.0"
|
||||
defaultVersion = "v0.15.0"
|
||||
secretsDir = "/etc/alertmanager/secrets/"
|
||||
alertmanagerConfDir = "/etc/alertmanager/config"
|
||||
alertmanagerConfFile = alertmanagerConfDir + "/alertmanager.yaml"
|
||||
|
|
|
@ -191,6 +191,7 @@ func UnstructuredFromAlertmanager(a *Alertmanager) (*unstructured.Unstructured,
|
|||
// necessary anymore.
|
||||
unstructured.RemoveNestedField(r.Object, "metadata", "creationTimestamp")
|
||||
unstructured.RemoveNestedField(r.Object, "spec", "storage", "volumeClaimTemplate", "metadata", "creationTimestamp")
|
||||
unstructured.RemoveNestedField(r.Object, "spec", "podMetadata", "creationTimestamp")
|
||||
return &r, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -1043,11 +1043,17 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA
|
|||
Ref: ref("k8s.io/api/core/v1.SecretKeySelector"),
|
||||
},
|
||||
},
|
||||
"thanos": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "Thanos configuration allows configuring various aspects of a Prometheus server in a Thanos environment.\n\nThis section is experimental, it may change significantly without deprecation notice in any release.\n\nThis is experimental and may change significantly without backward compatibility in any release.",
|
||||
Ref: ref("github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.ThanosSpec"),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Dependencies: []string{
|
||||
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.AlertingSpec", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.RemoteReadSpec", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.RemoteWriteSpec", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.StorageSpec", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.Container", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.ResourceRequirements", "k8s.io/api/core/v1.SecretKeySelector", "k8s.io/api/core/v1.Toleration", "k8s.io/apimachinery/pkg/apis/meta/v1.LabelSelector", "k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"},
|
||||
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.AlertingSpec", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.RemoteReadSpec", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.RemoteWriteSpec", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.StorageSpec", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.ThanosSpec", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.Container", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.ResourceRequirements", "k8s.io/api/core/v1.SecretKeySelector", "k8s.io/api/core/v1.Toleration", "k8s.io/apimachinery/pkg/apis/meta/v1.LabelSelector", "k8s.io/apimachinery/pkg/apis/meta/v1.ObjectMeta"},
|
||||
},
|
||||
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.PrometheusStatus": {
|
||||
Schema: spec.Schema{
|
||||
|
@ -1095,6 +1101,65 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA
|
|||
},
|
||||
Dependencies: []string{},
|
||||
},
|
||||
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.QueueConfig": {
|
||||
Schema: spec.Schema{
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "QueueConfig allows the tuning of remote_write queue_config parameters. This object is referenced in the RemoteWriteSpec object.",
|
||||
Properties: map[string]spec.Schema{
|
||||
"capacity": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "Capacity is the number of samples to buffer per shard before we start dropping them.",
|
||||
Type: []string{"integer"},
|
||||
Format: "int32",
|
||||
},
|
||||
},
|
||||
"maxShards": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "MaxShards is the maximum number of shards, i.e. amount of concurrency.",
|
||||
Type: []string{"integer"},
|
||||
Format: "int32",
|
||||
},
|
||||
},
|
||||
"maxSamplesPerSend": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "MaxSamplesPerSend is the maximum number of samples per send.",
|
||||
Type: []string{"integer"},
|
||||
Format: "int32",
|
||||
},
|
||||
},
|
||||
"batchSendDeadline": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "BatchSendDeadline is the maximum time a sample will wait in buffer.",
|
||||
Type: []string{"string"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
"maxRetries": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "MaxRetries is the maximum number of times to retry a batch on recoverable errors.",
|
||||
Type: []string{"integer"},
|
||||
Format: "int32",
|
||||
},
|
||||
},
|
||||
"minBackoff": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "MinBackoff is the initial retry delay. Gets doubled for every retry.",
|
||||
Type: []string{"string"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
"maxBackoff": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "MaxBackoff is the maximum retry delay.",
|
||||
Type: []string{"string"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Dependencies: []string{},
|
||||
},
|
||||
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.RelabelConfig": {
|
||||
Schema: spec.Schema{
|
||||
SchemaProps: spec.SchemaProps{
|
||||
|
@ -1306,12 +1371,18 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA
|
|||
Format: "",
|
||||
},
|
||||
},
|
||||
"queueConfig": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "QueueConfig allows tuning of the remote write queue parameters.",
|
||||
Ref: ref("github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.QueueConfig"),
|
||||
},
|
||||
},
|
||||
},
|
||||
Required: []string{"url"},
|
||||
},
|
||||
},
|
||||
Dependencies: []string{
|
||||
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.BasicAuth", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.RelabelConfig", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.TLSConfig"},
|
||||
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.BasicAuth", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.QueueConfig", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.RelabelConfig", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.TLSConfig"},
|
||||
},
|
||||
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.Rule": {
|
||||
Schema: spec.Schema{
|
||||
|
@ -1632,6 +1703,118 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA
|
|||
},
|
||||
Dependencies: []string{},
|
||||
},
|
||||
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.ThanosGCSSpec": {
|
||||
Schema: spec.Schema{
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "ThanosGCSSpec defines parameters for use of Google Cloud Storage (GCS) with Thanos.",
|
||||
Properties: map[string]spec.Schema{
|
||||
"bucket": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "Google Cloud Storage bucket name for stored blocks. If empty it won't store any block inside Google Cloud Storage.",
|
||||
Type: []string{"string"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Dependencies: []string{},
|
||||
},
|
||||
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.ThanosS3Spec": {
|
||||
Schema: spec.Schema{
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "ThanosSpec defines parameters for of AWS Simple Storage Service (S3) with Thanos. (S3 compatible services apply as well)",
|
||||
Properties: map[string]spec.Schema{
|
||||
"bucket": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "S3-Compatible API bucket name for stored blocks.",
|
||||
Type: []string{"string"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
"endpoint": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "S3-Compatible API endpoint for stored blocks.",
|
||||
Type: []string{"string"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
"accessKey": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "AccessKey for an S3-Compatible API.",
|
||||
Ref: ref("k8s.io/api/core/v1.SecretKeySelector"),
|
||||
},
|
||||
},
|
||||
"secretKey": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "SecretKey for an S3-Compatible API.",
|
||||
Ref: ref("k8s.io/api/core/v1.SecretKeySelector"),
|
||||
},
|
||||
},
|
||||
"insecure": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "Whether to use an insecure connection with an S3-Compatible API.",
|
||||
Type: []string{"boolean"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
"signatureVersion2": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "Whether to use S3 Signature Version 2; otherwise Signature Version 4 will be used.",
|
||||
Type: []string{"boolean"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Dependencies: []string{
|
||||
"k8s.io/api/core/v1.SecretKeySelector"},
|
||||
},
|
||||
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.ThanosSpec": {
|
||||
Schema: spec.Schema{
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "ThanosSpec defines parameters for a Prometheus server within a Thanos deployment.",
|
||||
Properties: map[string]spec.Schema{
|
||||
"peers": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "Peers is a DNS name for Thanos to discover peers through.",
|
||||
Type: []string{"string"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
"version": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "Version describes the version of Thanos to use.",
|
||||
Type: []string{"string"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
"baseImage": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "Thanos base image if other than default.",
|
||||
Type: []string{"string"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
"gcs": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "GCS configures use of GCS in Thanos.",
|
||||
Ref: ref("github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.ThanosGCSSpec"),
|
||||
},
|
||||
},
|
||||
"s3": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "S3 configures use of S3 in Thanos.",
|
||||
Ref: ref("github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.ThanosS3Spec"),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Dependencies: []string{
|
||||
"github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.ThanosGCSSpec", "github.com/coreos/prometheus-operator/pkg/client/monitoring/v1.ThanosS3Spec"},
|
||||
},
|
||||
"k8s.io/api/core/v1.AWSElasticBlockStoreVolumeSource": {
|
||||
Schema: spec.Schema{
|
||||
SchemaProps: spec.SchemaProps{
|
||||
|
|
|
@ -190,6 +190,7 @@ func UnstructuredFromPrometheus(p *Prometheus) (*unstructured.Unstructured, erro
|
|||
// necessary anymore.
|
||||
unstructured.RemoveNestedField(r.Object, "metadata", "creationTimestamp")
|
||||
unstructured.RemoveNestedField(r.Object, "spec", "storage", "volumeClaimTemplate", "metadata", "creationTimestamp")
|
||||
unstructured.RemoveNestedField(r.Object, "spec", "podMetadata", "creationTimestamp")
|
||||
return &r, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -163,6 +163,15 @@ type PrometheusSpec struct {
|
|||
// notes to ensure that no incompatible AlertManager configs are going to break
|
||||
// Prometheus after the upgrade.
|
||||
AdditionalAlertManagerConfigs *v1.SecretKeySelector `json:"additionalAlertManagerConfigs,omitempty"`
|
||||
// Thanos configuration allows configuring various aspects of a Prometheus
|
||||
// server in a Thanos environment.
|
||||
//
|
||||
// This section is experimental, it may change significantly without
|
||||
// deprecation notice in any release.
|
||||
//
|
||||
// This is experimental and may change significantly without backward
|
||||
// compatibility in any release.
|
||||
Thanos *ThanosSpec `json:"thanos,omitempty"`
|
||||
}
|
||||
|
||||
// Most recent observed status of the Prometheus cluster. Read-only. Not
|
||||
|
@ -215,6 +224,48 @@ type StorageSpec struct {
|
|||
VolumeClaimTemplate v1.PersistentVolumeClaim `json:"volumeClaimTemplate,omitempty"`
|
||||
}
|
||||
|
||||
// ThanosSpec defines parameters for a Prometheus server within a Thanos deployment.
|
||||
// +k8s:openapi-gen=true
|
||||
type ThanosSpec struct {
|
||||
// Peers is a DNS name for Thanos to discover peers through.
|
||||
Peers *string `json:"peers,omitempty"`
|
||||
// Version describes the version of Thanos to use.
|
||||
Version *string `json:"version,omitempty"`
|
||||
// Thanos base image if other than default.
|
||||
BaseImage *string `json:"baseImage,omitempty"`
|
||||
// GCS configures use of GCS in Thanos.
|
||||
GCS *ThanosGCSSpec `json:"gcs,omitempty"`
|
||||
// S3 configures use of S3 in Thanos.
|
||||
S3 *ThanosS3Spec `json:"s3,omitempty"`
|
||||
}
|
||||
|
||||
// ThanosGCSSpec defines parameters for use of Google Cloud Storage (GCS) with
|
||||
// Thanos.
|
||||
// +k8s:openapi-gen=true
|
||||
type ThanosGCSSpec struct {
|
||||
// Google Cloud Storage bucket name for stored blocks. If empty it won't
|
||||
// store any block inside Google Cloud Storage.
|
||||
Bucket *string `json:"bucket,omitempty"`
|
||||
}
|
||||
|
||||
// ThanosSpec defines parameters for of AWS Simple Storage Service (S3) with
|
||||
// Thanos. (S3 compatible services apply as well)
|
||||
// +k8s:openapi-gen=true
|
||||
type ThanosS3Spec struct {
|
||||
// S3-Compatible API bucket name for stored blocks.
|
||||
Bucket *string `json:"bucket,omitempty"`
|
||||
// S3-Compatible API endpoint for stored blocks.
|
||||
Endpoint *string `json:"endpoint,omitempty"`
|
||||
// AccessKey for an S3-Compatible API.
|
||||
AccessKey *v1.SecretKeySelector `json:"accessKey,omitempty"`
|
||||
// SecretKey for an S3-Compatible API.
|
||||
SecretKey *v1.SecretKeySelector `json:"secretKey,omitempty"`
|
||||
// Whether to use an insecure connection with an S3-Compatible API.
|
||||
Insecure *bool `json:"insecure,omitempty"`
|
||||
// Whether to use S3 Signature Version 2; otherwise Signature Version 4 will be used.
|
||||
SignatureVersion2 *bool `json:"signatureVersion2,omitempty"`
|
||||
}
|
||||
|
||||
// RemoteWriteSpec defines the remote_write configuration for prometheus.
|
||||
// +k8s:openapi-gen=true
|
||||
type RemoteWriteSpec struct {
|
||||
|
@ -234,6 +285,28 @@ type RemoteWriteSpec struct {
|
|||
TLSConfig *TLSConfig `json:"tlsConfig,omitempty"`
|
||||
//Optional ProxyURL
|
||||
ProxyURL string `json:"proxyUrl,omitempty"`
|
||||
// QueueConfig allows tuning of the remote write queue parameters.
|
||||
QueueConfig *QueueConfig `json:"queueConfig,omitempty"`
|
||||
}
|
||||
|
||||
// QueueConfig allows the tuning of remote_write queue_config parameters. This object
|
||||
// is referenced in the RemoteWriteSpec object.
|
||||
// +k8s:openapi-gen=true
|
||||
type QueueConfig struct {
|
||||
// Capacity is the number of samples to buffer per shard before we start dropping them.
|
||||
Capacity int `json:"capacity,omitempty"`
|
||||
// MaxShards is the maximum number of shards, i.e. amount of concurrency.
|
||||
MaxShards int `json:"maxShards,omitempty"`
|
||||
// MaxSamplesPerSend is the maximum number of samples per send.
|
||||
MaxSamplesPerSend int `json:"maxSamplesPerSend,omitempty"`
|
||||
// BatchSendDeadline is the maximum time a sample will wait in buffer.
|
||||
BatchSendDeadline string `json:"batchSendDeadline,omitempty"`
|
||||
// MaxRetries is the maximum number of times to retry a batch on recoverable errors.
|
||||
MaxRetries int `json:"maxRetries,omitempty"`
|
||||
// MinBackoff is the initial retry delay. Gets doubled for every retry.
|
||||
MinBackoff string `json:"minBackoff,omitempty"`
|
||||
// MaxBackoff is the maximum retry delay.
|
||||
MaxBackoff string `json:"maxBackoff,omitempty"`
|
||||
}
|
||||
|
||||
// RemoteReadSpec defines the remote_read configuration for prometheus.
|
||||
|
|
|
@ -671,6 +671,15 @@ func (in *PrometheusSpec) DeepCopyInto(out *PrometheusSpec) {
|
|||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
}
|
||||
if in.Thanos != nil {
|
||||
in, out := &in.Thanos, &out.Thanos
|
||||
if *in == nil {
|
||||
*out = nil
|
||||
} else {
|
||||
*out = new(ThanosSpec)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -700,6 +709,22 @@ func (in *PrometheusStatus) DeepCopy() *PrometheusStatus {
|
|||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *QueueConfig) DeepCopyInto(out *QueueConfig) {
|
||||
*out = *in
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new QueueConfig.
|
||||
func (in *QueueConfig) DeepCopy() *QueueConfig {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(QueueConfig)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *RelabelConfig) DeepCopyInto(out *RelabelConfig) {
|
||||
*out = *in
|
||||
|
@ -790,6 +815,15 @@ func (in *RemoteWriteSpec) DeepCopyInto(out *RemoteWriteSpec) {
|
|||
**out = **in
|
||||
}
|
||||
}
|
||||
if in.QueueConfig != nil {
|
||||
in, out := &in.QueueConfig, &out.QueueConfig
|
||||
if *in == nil {
|
||||
*out = nil
|
||||
} else {
|
||||
*out = new(QueueConfig)
|
||||
**out = **in
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -986,3 +1020,159 @@ func (in *TLSConfig) DeepCopy() *TLSConfig {
|
|||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *ThanosGCSSpec) DeepCopyInto(out *ThanosGCSSpec) {
|
||||
*out = *in
|
||||
if in.Bucket != nil {
|
||||
in, out := &in.Bucket, &out.Bucket
|
||||
if *in == nil {
|
||||
*out = nil
|
||||
} else {
|
||||
*out = new(string)
|
||||
**out = **in
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ThanosGCSSpec.
|
||||
func (in *ThanosGCSSpec) DeepCopy() *ThanosGCSSpec {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(ThanosGCSSpec)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *ThanosS3Spec) DeepCopyInto(out *ThanosS3Spec) {
|
||||
*out = *in
|
||||
if in.Bucket != nil {
|
||||
in, out := &in.Bucket, &out.Bucket
|
||||
if *in == nil {
|
||||
*out = nil
|
||||
} else {
|
||||
*out = new(string)
|
||||
**out = **in
|
||||
}
|
||||
}
|
||||
if in.Endpoint != nil {
|
||||
in, out := &in.Endpoint, &out.Endpoint
|
||||
if *in == nil {
|
||||
*out = nil
|
||||
} else {
|
||||
*out = new(string)
|
||||
**out = **in
|
||||
}
|
||||
}
|
||||
if in.AccessKey != nil {
|
||||
in, out := &in.AccessKey, &out.AccessKey
|
||||
if *in == nil {
|
||||
*out = nil
|
||||
} else {
|
||||
*out = new(core_v1.SecretKeySelector)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
}
|
||||
if in.SecretKey != nil {
|
||||
in, out := &in.SecretKey, &out.SecretKey
|
||||
if *in == nil {
|
||||
*out = nil
|
||||
} else {
|
||||
*out = new(core_v1.SecretKeySelector)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
}
|
||||
if in.Insecure != nil {
|
||||
in, out := &in.Insecure, &out.Insecure
|
||||
if *in == nil {
|
||||
*out = nil
|
||||
} else {
|
||||
*out = new(bool)
|
||||
**out = **in
|
||||
}
|
||||
}
|
||||
if in.SignatureVersion2 != nil {
|
||||
in, out := &in.SignatureVersion2, &out.SignatureVersion2
|
||||
if *in == nil {
|
||||
*out = nil
|
||||
} else {
|
||||
*out = new(bool)
|
||||
**out = **in
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ThanosS3Spec.
|
||||
func (in *ThanosS3Spec) DeepCopy() *ThanosS3Spec {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(ThanosS3Spec)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *ThanosSpec) DeepCopyInto(out *ThanosSpec) {
|
||||
*out = *in
|
||||
if in.Peers != nil {
|
||||
in, out := &in.Peers, &out.Peers
|
||||
if *in == nil {
|
||||
*out = nil
|
||||
} else {
|
||||
*out = new(string)
|
||||
**out = **in
|
||||
}
|
||||
}
|
||||
if in.Version != nil {
|
||||
in, out := &in.Version, &out.Version
|
||||
if *in == nil {
|
||||
*out = nil
|
||||
} else {
|
||||
*out = new(string)
|
||||
**out = **in
|
||||
}
|
||||
}
|
||||
if in.BaseImage != nil {
|
||||
in, out := &in.BaseImage, &out.BaseImage
|
||||
if *in == nil {
|
||||
*out = nil
|
||||
} else {
|
||||
*out = new(string)
|
||||
**out = **in
|
||||
}
|
||||
}
|
||||
if in.GCS != nil {
|
||||
in, out := &in.GCS, &out.GCS
|
||||
if *in == nil {
|
||||
*out = nil
|
||||
} else {
|
||||
*out = new(ThanosGCSSpec)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
}
|
||||
if in.S3 != nil {
|
||||
in, out := &in.S3, &out.S3
|
||||
if *in == nil {
|
||||
*out = nil
|
||||
} else {
|
||||
*out = new(ThanosS3Spec)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ThanosSpec.
|
||||
func (in *ThanosSpec) DeepCopy() *ThanosSpec {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(ThanosSpec)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
|
|
@ -128,6 +128,7 @@ type Config struct {
|
|||
PrometheusConfigReloader string
|
||||
AlertmanagerDefaultBaseImage string
|
||||
PrometheusDefaultBaseImage string
|
||||
ThanosDefaultBaseImage string
|
||||
Namespace string
|
||||
Labels Labels
|
||||
CrdGroup string
|
||||
|
|
|
@ -89,7 +89,7 @@ func buildExternalLabels(p *v1.Prometheus) yaml.MapSlice {
|
|||
func generateConfig(p *v1.Prometheus, mons map[string]*v1.ServiceMonitor, basicAuthSecrets map[string]BasicAuthCredentials, additionalScrapeConfigs []byte, additionalAlertManagerConfigs []byte) ([]byte, error) {
|
||||
versionStr := p.Spec.Version
|
||||
if versionStr == "" {
|
||||
versionStr = DefaultVersion
|
||||
versionStr = DefaultPrometheusVersion
|
||||
}
|
||||
|
||||
version, err := semver.Parse(strings.TrimLeft(versionStr, "v"))
|
||||
|
@ -149,7 +149,7 @@ func generateConfig(p *v1.Prometheus, mons map[string]*v1.ServiceMonitor, basicA
|
|||
var additionalScrapeConfigsYaml []yaml.MapSlice
|
||||
err = yaml.Unmarshal([]byte(additionalScrapeConfigs), &additionalScrapeConfigsYaml)
|
||||
if err != nil {
|
||||
errors.Wrap(err, "unmarshalling additional scrape configs failed")
|
||||
return nil, errors.Wrap(err, "unmarshalling additional scrape configs failed")
|
||||
}
|
||||
|
||||
cfg = append(cfg, yaml.MapItem{
|
||||
|
@ -160,7 +160,7 @@ func generateConfig(p *v1.Prometheus, mons map[string]*v1.ServiceMonitor, basicA
|
|||
var additionalAlertManagerConfigsYaml []yaml.MapSlice
|
||||
err = yaml.Unmarshal([]byte(additionalAlertManagerConfigs), &additionalAlertManagerConfigsYaml)
|
||||
if err != nil {
|
||||
errors.Wrap(err, "unmarshalling additional alert manager configs failed")
|
||||
return nil, errors.Wrap(err, "unmarshalling additional alert manager configs failed")
|
||||
}
|
||||
|
||||
alertmanagerConfigs = append(alertmanagerConfigs, additionalAlertManagerConfigsYaml...)
|
||||
|
@ -710,6 +710,40 @@ func generateRemoteWriteConfig(version semver.Version, specs []v1.RemoteWriteSpe
|
|||
cfg = append(cfg, yaml.MapItem{Key: "proxy_url", Value: spec.ProxyURL})
|
||||
}
|
||||
|
||||
if spec.QueueConfig != nil {
|
||||
queueConfig := yaml.MapSlice{}
|
||||
|
||||
if spec.QueueConfig.Capacity != int(0) {
|
||||
queueConfig = append(queueConfig, yaml.MapItem{Key: "capacity", Value: spec.QueueConfig.Capacity})
|
||||
}
|
||||
|
||||
if spec.QueueConfig.MaxShards != int(0) {
|
||||
queueConfig = append(queueConfig, yaml.MapItem{Key: "max_shards", Value: spec.QueueConfig.MaxShards})
|
||||
}
|
||||
|
||||
if spec.QueueConfig.MaxSamplesPerSend != int(0) {
|
||||
queueConfig = append(queueConfig, yaml.MapItem{Key: "max_samples_per_send", Value: spec.QueueConfig.MaxSamplesPerSend})
|
||||
}
|
||||
|
||||
if spec.QueueConfig.BatchSendDeadline != "" {
|
||||
queueConfig = append(queueConfig, yaml.MapItem{Key: "batch_send_deadline", Value: spec.QueueConfig.BatchSendDeadline})
|
||||
}
|
||||
|
||||
if spec.QueueConfig.MaxRetries != int(0) {
|
||||
queueConfig = append(queueConfig, yaml.MapItem{Key: "max_retries", Value: spec.QueueConfig.MaxRetries})
|
||||
}
|
||||
|
||||
if spec.QueueConfig.MinBackoff != "" {
|
||||
queueConfig = append(queueConfig, yaml.MapItem{Key: "min_backoff", Value: spec.QueueConfig.MinBackoff})
|
||||
}
|
||||
|
||||
if spec.QueueConfig.MaxBackoff != "" {
|
||||
queueConfig = append(queueConfig, yaml.MapItem{Key: "max_backoff", Value: spec.QueueConfig.MaxBackoff})
|
||||
}
|
||||
|
||||
cfg = append(cfg, yaml.MapItem{Key: "queue_config", Value: queueConfig})
|
||||
}
|
||||
|
||||
cfgs = append(cfgs, cfg)
|
||||
}
|
||||
|
||||
|
|
|
@ -32,17 +32,18 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
governingServiceName = "prometheus-operated"
|
||||
DefaultVersion = "v2.2.1"
|
||||
defaultRetention = "24h"
|
||||
storageDir = "/prometheus"
|
||||
confDir = "/etc/prometheus/config"
|
||||
confOutDir = "/etc/prometheus/config_out"
|
||||
rulesDir = "/etc/prometheus/rules"
|
||||
secretsDir = "/etc/prometheus/secrets/"
|
||||
configFilename = "prometheus.yaml"
|
||||
configEnvsubstFilename = "prometheus.env.yaml"
|
||||
sSetInputChecksumName = "prometheus-operator-input-checksum"
|
||||
governingServiceName = "prometheus-operated"
|
||||
DefaultPrometheusVersion = "v2.3.1"
|
||||
DefaultThanosVersion = "v0.1.0-rc.1"
|
||||
defaultRetention = "24h"
|
||||
storageDir = "/prometheus"
|
||||
confDir = "/etc/prometheus/config"
|
||||
confOutDir = "/etc/prometheus/config_out"
|
||||
rulesDir = "/etc/prometheus/rules"
|
||||
secretsDir = "/etc/prometheus/secrets/"
|
||||
configFilename = "prometheus.yaml"
|
||||
configEnvsubstFilename = "prometheus.env.yaml"
|
||||
sSetInputChecksumName = "prometheus-operator-input-checksum"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -88,7 +89,11 @@ func makeStatefulSet(
|
|||
p.Spec.BaseImage = config.PrometheusDefaultBaseImage
|
||||
}
|
||||
if p.Spec.Version == "" {
|
||||
p.Spec.Version = DefaultVersion
|
||||
p.Spec.Version = DefaultPrometheusVersion
|
||||
}
|
||||
if p.Spec.Thanos != nil && p.Spec.Thanos.Version == nil {
|
||||
v := DefaultThanosVersion
|
||||
p.Spec.Thanos.Version = &v
|
||||
}
|
||||
|
||||
versionStr := strings.TrimLeft(p.Spec.Version, "v")
|
||||
|
@ -542,6 +547,86 @@ func makeStatefulSetSpec(p monitoringv1.Prometheus, c *Config) (*appsv1.Stateful
|
|||
|
||||
finalLabels := c.Labels.Merge(podLabels)
|
||||
|
||||
additionalContainers := p.Spec.Containers
|
||||
|
||||
if p.Spec.Thanos != nil {
|
||||
thanosBaseImage := c.ThanosDefaultBaseImage
|
||||
if p.Spec.Thanos.BaseImage != nil {
|
||||
thanosBaseImage = *p.Spec.Thanos.BaseImage
|
||||
}
|
||||
|
||||
thanosArgs := []string{"sidecar"}
|
||||
|
||||
if p.Spec.Thanos.Peers != nil {
|
||||
thanosArgs = append(thanosArgs, fmt.Sprintf("--cluster.peers=%s", *p.Spec.Thanos.Peers))
|
||||
}
|
||||
if p.Spec.LogLevel != "" && p.Spec.LogLevel != "info" {
|
||||
thanosArgs = append(thanosArgs, fmt.Sprintf("--log.level=%s", p.Spec.LogLevel))
|
||||
}
|
||||
|
||||
if p.Spec.Thanos.GCS != nil {
|
||||
if p.Spec.Thanos.GCS.Bucket != nil {
|
||||
thanosArgs = append(thanosArgs, fmt.Sprintf("--gcs.bucket=%s", *p.Spec.Thanos.GCS.Bucket))
|
||||
}
|
||||
}
|
||||
|
||||
envVars := []v1.EnvVar{}
|
||||
if p.Spec.Thanos.S3 != nil {
|
||||
if p.Spec.Thanos.S3.Bucket != nil {
|
||||
thanosArgs = append(thanosArgs, fmt.Sprintf("--s3.bucket=%s", *p.Spec.Thanos.S3.Bucket))
|
||||
}
|
||||
if p.Spec.Thanos.S3.Endpoint != nil {
|
||||
thanosArgs = append(thanosArgs, fmt.Sprintf("--s3.endpoint=%s", *p.Spec.Thanos.S3.Endpoint))
|
||||
}
|
||||
if p.Spec.Thanos.S3.Insecure != nil && *p.Spec.Thanos.S3.Insecure {
|
||||
thanosArgs = append(thanosArgs, "--s3.insecure")
|
||||
}
|
||||
if p.Spec.Thanos.S3.SignatureVersion2 != nil && *p.Spec.Thanos.S3.SignatureVersion2 {
|
||||
thanosArgs = append(thanosArgs, "--s3.signature-version2")
|
||||
}
|
||||
if p.Spec.Thanos.S3.AccessKey != nil {
|
||||
envVars = append(envVars, v1.EnvVar{
|
||||
Name: "S3_ACCESS_KEY",
|
||||
ValueFrom: &v1.EnvVarSource{
|
||||
SecretKeyRef: p.Spec.Thanos.S3.AccessKey,
|
||||
},
|
||||
})
|
||||
}
|
||||
if p.Spec.Thanos.S3.SecretKey != nil {
|
||||
envVars = append(envVars, v1.EnvVar{
|
||||
Name: "S3_SECRET_KEY",
|
||||
ValueFrom: &v1.EnvVarSource{
|
||||
SecretKeyRef: p.Spec.Thanos.S3.SecretKey,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
c := v1.Container{
|
||||
Name: "thanos-sidecar",
|
||||
Image: thanosBaseImage + ":" + *p.Spec.Thanos.Version,
|
||||
Args: thanosArgs,
|
||||
Ports: []v1.ContainerPort{
|
||||
{
|
||||
Name: "http",
|
||||
ContainerPort: 10902,
|
||||
},
|
||||
{
|
||||
Name: "grpc",
|
||||
ContainerPort: 10901,
|
||||
},
|
||||
{
|
||||
Name: "cluster",
|
||||
ContainerPort: 10900,
|
||||
},
|
||||
},
|
||||
Env: envVars,
|
||||
}
|
||||
|
||||
additionalContainers = append(additionalContainers, c)
|
||||
promArgs = append(promArgs, "--storage.tsdb.min-block-duration=2h", "--storage.tsdb.max-block-duration=2h")
|
||||
}
|
||||
|
||||
return &appsv1.StatefulSetSpec{
|
||||
ServiceName: governingServiceName,
|
||||
Replicas: p.Spec.Replicas,
|
||||
|
@ -610,7 +695,7 @@ func makeStatefulSetSpec(p monitoringv1.Prometheus, c *Config) (*appsv1.Stateful
|
|||
},
|
||||
},
|
||||
},
|
||||
}, p.Spec.Containers...),
|
||||
}, additionalContainers...),
|
||||
SecurityContext: securityContext,
|
||||
ServiceAccountName: p.Spec.ServiceAccountName,
|
||||
NodeSelector: p.Spec.NodeSelector,
|
||||
|
|
|
@ -205,7 +205,7 @@ func TestStatefulSetVolumeInitial(t *testing.T) {
|
|||
VolumeSource: v1.VolumeSource{
|
||||
ConfigMap: &v1.ConfigMapVolumeSource{
|
||||
LocalObjectReference: v1.LocalObjectReference{
|
||||
Name: "prometheus-volume-init-test-rules",
|
||||
Name: "prometheus-volume-init-test-rulefiles",
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
|
@ -15,40 +15,19 @@ if [[ "${TRAVIS_PULL_REQUEST}" != "false" ]]; then
|
|||
exit 0
|
||||
fi
|
||||
|
||||
# Builds both prometheus-operator and prometheus-config-reloader
|
||||
make crossbuild
|
||||
|
||||
|
||||
#
|
||||
# prometheus-operator
|
||||
#
|
||||
|
||||
export REPO=quay.io/coreos/prometheus-operator
|
||||
# Push to Quay '-dev' repo if not a git tag or master branch build
|
||||
export REPO="quay.io/coreos/prometheus-operator"
|
||||
export REPO_PROMETHEUS_CONFIG_RELOADER="quay.io/coreos/prometheus-config-reloader"
|
||||
if [[ "${TRAVIS_TAG}" == "" ]] && [[ "${TRAVIS_BRANCH}" != master ]]; then
|
||||
export REPO="${REPO}-dev"
|
||||
export REPO="quay.io/coreos/prometheus-operator-dev"
|
||||
export REPO_PROMETHEUS_CONFIG_RELOADER="quay.io/coreos/prometheus-config-reloader-dev"
|
||||
fi
|
||||
|
||||
# For both git tags and git branches 'TRAVIS_BRANCH' contains the name.
|
||||
export TAG="${TRAVIS_BRANCH}"
|
||||
make container
|
||||
|
||||
make image
|
||||
|
||||
echo "${QUAY_PASSWORD}" | docker login -u "${QUAY_USERNAME}" --password-stdin quay.io
|
||||
docker push "${REPO}:${TRAVIS_BRANCH}"
|
||||
|
||||
|
||||
#
|
||||
# prometheus-config-reloader
|
||||
#
|
||||
|
||||
cd contrib/prometheus-config-reloader
|
||||
export REPO=quay.io/coreos/prometheus-config-reloader
|
||||
# Push to Quay '-dev' repo if not a git tag or master branch build
|
||||
if [[ "${TRAVIS_TAG}" == "" ]] && [[ "${TRAVIS_BRANCH}" != master ]]; then
|
||||
export REPO="${REPO}-dev"
|
||||
fi
|
||||
|
||||
# For both git tags and git branches 'TRAVIS_BRANCH' contains the name.
|
||||
export TAG="${TRAVIS_BRANCH}"
|
||||
make container
|
||||
echo "${QUAY_PASSWORD}" | docker login -u "${QUAY_USERNAME}" --password-stdin quay.io
|
||||
docker push "${REPO}:${TRAVIS_BRANCH}"
|
||||
docker push "${REPO}:${TAG}"
|
||||
docker push "${REPO_PROMETHEUS_CONFIG_RELOADER}:${TAG}"
|
||||
|
|
|
@ -1070,6 +1070,107 @@ func TestPromOpMatchPromAndServMonInDiffNSs(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestThanos(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx := framework.NewTestCtx(t)
|
||||
defer ctx.Cleanup(t)
|
||||
ns := ctx.CreateNamespace(t, framework.KubeClient)
|
||||
ctx.SetupPrometheusRBAC(t, ns, framework.KubeClient)
|
||||
|
||||
peerServiceName := "thanos-peers"
|
||||
querierServiceName := "thanos-querier"
|
||||
basicPrometheus := framework.MakeBasicPrometheus(ns, "basic-prometheus", "test-group", 1)
|
||||
peerServiceDNS := fmt.Sprintf("%s.%s.svc:10900", peerServiceName, ns)
|
||||
version := "v0.1.0-rc.1"
|
||||
basicPrometheus.Spec.Thanos = &monitoringv1.ThanosSpec{
|
||||
Peers: &peerServiceDNS,
|
||||
Version: &version,
|
||||
}
|
||||
basicPrometheus.Spec.PodMetadata = &metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
"thanos-peer": "true",
|
||||
},
|
||||
}
|
||||
replicas := int32(2)
|
||||
basicPrometheus.Spec.Replicas = &replicas
|
||||
pservice := framework.MakePrometheusService(basicPrometheus.Name, "test-group", v1.ServiceTypeClusterIP)
|
||||
tservice := framework.MakeThanosService(peerServiceName)
|
||||
qservice := framework.MakeThanosQuerierService(querierServiceName)
|
||||
s := framework.MakeBasicServiceMonitor("test-group")
|
||||
thanosQuerier, err := testFramework.MakeDeployment("../../example/thanos/querier-deployment.yaml")
|
||||
if err != nil {
|
||||
t.Fatal("Making deployment failed: ", err)
|
||||
}
|
||||
querierArgs := []string{
|
||||
"query",
|
||||
"--log.level=debug",
|
||||
"--query.replica-label=prometheus_replica",
|
||||
fmt.Sprintf("--cluster.peers=%s", peerServiceDNS),
|
||||
}
|
||||
log.Println("setting up querier with args: ", querierArgs)
|
||||
thanosQuerier.Spec.Template.Spec.Containers[0].Args = querierArgs
|
||||
|
||||
if err := testFramework.CreateDeployment(framework.KubeClient, ns, thanosQuerier); err != nil {
|
||||
t.Fatal("Creating Thanos querier failed: ", err)
|
||||
}
|
||||
|
||||
if _, err := testFramework.CreateServiceAndWaitUntilReady(framework.KubeClient, ns, qservice); err != nil {
|
||||
t.Fatal("Creating Thanos querier service failed: ", err)
|
||||
}
|
||||
|
||||
if _, err := framework.MonClientV1.ServiceMonitors(ns).Create(s); err != nil {
|
||||
t.Fatal("Creating ServiceMonitor failed: ", err)
|
||||
}
|
||||
|
||||
if _, err := framework.KubeClient.CoreV1().Services(ns).Create(pservice); err != nil {
|
||||
t.Fatal("Creating prometheus service failed: ", err)
|
||||
}
|
||||
|
||||
if _, err := framework.MonClientV1.Prometheuses(ns).Create(basicPrometheus); err != nil {
|
||||
t.Fatal("Creating prometheus failed: ", err)
|
||||
}
|
||||
|
||||
if _, err := framework.KubeClient.CoreV1().Services(ns).Create(tservice); err != nil {
|
||||
t.Fatal("Creating prometheus service failed: ", err)
|
||||
}
|
||||
|
||||
err = wait.Poll(5*time.Second, 5*time.Minute, func() (bool, error) {
|
||||
proxyGet := framework.KubeClient.CoreV1().Services(ns).ProxyGet
|
||||
request := proxyGet("http", querierServiceName, "http-query", "/api/v1/query", map[string]string{"query": "prometheus_build_info", "dedup": "false"})
|
||||
b, err := request.DoRaw()
|
||||
if err != nil {
|
||||
log.Println(fmt.Sprintf("Error performing request against Thanos querier: %v\n\nretrying...", err))
|
||||
return false, nil
|
||||
}
|
||||
|
||||
d := struct {
|
||||
Data struct {
|
||||
Result []map[string]interface{} `json:"result"`
|
||||
} `json:"data"`
|
||||
}{}
|
||||
|
||||
err = json.Unmarshal(b, &d)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
result := len(d.Data.Result)
|
||||
// We're expecting 4 results as we are requesting the
|
||||
// `prometheus_build_info` metric, which is collected for both
|
||||
// Prometheus replicas by both replicas.
|
||||
expected := 4
|
||||
if result != expected {
|
||||
log.Printf("Unexpected number of results from query. Got %d, expected %d. retrying...\n", result, expected)
|
||||
return false, nil
|
||||
}
|
||||
return true, nil
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal("Failed to get correct result from Thanos querier: ", err)
|
||||
}
|
||||
}
|
||||
|
||||
func isDiscoveryWorking(ns, svcName, prometheusName string) func() (bool, error) {
|
||||
return func() (bool, error) {
|
||||
pods, err := framework.KubeClient.CoreV1().Pods(ns).List(prometheus.ListOptions(prometheusName))
|
||||
|
|
|
@ -41,7 +41,7 @@ func (f *Framework) MakeBasicPrometheus(ns, name, group string, replicas int32)
|
|||
},
|
||||
Spec: monitoringv1.PrometheusSpec{
|
||||
Replicas: &replicas,
|
||||
Version: prometheus.DefaultVersion,
|
||||
Version: prometheus.DefaultPrometheusVersion,
|
||||
ServiceMonitorSelector: &metav1.LabelSelector{
|
||||
MatchLabels: map[string]string{
|
||||
"group": group,
|
||||
|
@ -147,6 +147,48 @@ func (f *Framework) MakePrometheusService(name, group string, serviceType v1.Ser
|
|||
return service
|
||||
}
|
||||
|
||||
func (f *Framework) MakeThanosQuerierService(name string) *v1.Service {
|
||||
service := &v1.Service{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: name,
|
||||
},
|
||||
Spec: v1.ServiceSpec{
|
||||
Ports: []v1.ServicePort{
|
||||
v1.ServicePort{
|
||||
Name: "http-query",
|
||||
Port: 10902,
|
||||
TargetPort: intstr.FromString("http"),
|
||||
},
|
||||
},
|
||||
Selector: map[string]string{
|
||||
"app": "thanos-query",
|
||||
},
|
||||
},
|
||||
}
|
||||
return service
|
||||
}
|
||||
|
||||
func (f *Framework) MakeThanosService(name string) *v1.Service {
|
||||
service := &v1.Service{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: name,
|
||||
},
|
||||
Spec: v1.ServiceSpec{
|
||||
Ports: []v1.ServicePort{
|
||||
v1.ServicePort{
|
||||
Name: "cluster",
|
||||
Port: 10900,
|
||||
TargetPort: intstr.FromString("cluster"),
|
||||
},
|
||||
},
|
||||
Selector: map[string]string{
|
||||
"thanos-peer": "true",
|
||||
},
|
||||
},
|
||||
}
|
||||
return service
|
||||
}
|
||||
|
||||
func (f *Framework) CreatePrometheusAndWaitUntilReady(ns string, p *monitoringv1.Prometheus) error {
|
||||
_, err := f.MonClientV1.Prometheuses(ns).Create(p)
|
||||
if err != nil {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue