mirror of
https://github.com/monitoring-mixins/website.git
synced 2024-12-14 11:37:31 +00:00
assets,site/content: daily assets regeneration
This commit is contained in:
parent
3e4f531005
commit
5ca8d2b978
2 changed files with 24 additions and 14 deletions
|
@ -169,12 +169,13 @@ groups:
|
|||
severity: warning
|
||||
- alert: KubeContainerWaiting
|
||||
annotations:
|
||||
description: pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container
|
||||
{{ $labels.container}} has been in waiting state for longer than 1 hour.
|
||||
description: 'pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on
|
||||
container {{ $labels.container}} has been in waiting state for longer than
|
||||
1 hour. (reason: "{{ $labels.reason }}").'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting
|
||||
summary: Pod container waiting longer than 1 hour
|
||||
expr: |
|
||||
sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0
|
||||
kube_pod_container_status_waiting_reason{reason!="CrashLoopBackOff", job="kube-state-metrics"} > 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
|
@ -365,9 +366,9 @@ groups:
|
|||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
|
||||
summary: Processes experience elevated CPU throttling.
|
||||
expr: |
|
||||
sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (cluster, container, pod, namespace)
|
||||
sum(increase(container_cpu_cfs_throttled_periods_total{container!="", job="cadvisor", }[5m])) without (id, metrics_path, name, image, endpoint, job, node)
|
||||
/
|
||||
sum(increase(container_cpu_cfs_periods_total{}[5m])) by (cluster, container, pod, namespace)
|
||||
sum(increase(container_cpu_cfs_periods_total{job="cadvisor", }[5m])) without (id, metrics_path, name, image, endpoint, job, node)
|
||||
> ( 25 / 100 )
|
||||
for: 15m
|
||||
labels:
|
||||
|
@ -573,7 +574,9 @@ groups:
|
|||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
|
||||
summary: Client certificate is about to expire.
|
||||
expr: |
|
||||
apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(cluster, job) histogram_quantile(0.01, sum by (cluster, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 604800
|
||||
histogram_quantile(0.01, sum without (namespace, service, endpoint) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 604800
|
||||
and
|
||||
on(job, cluster, instance) apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
|
@ -584,7 +587,9 @@ groups:
|
|||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
|
||||
summary: Client certificate is about to expire.
|
||||
expr: |
|
||||
apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(cluster, job) histogram_quantile(0.01, sum by (cluster, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 86400
|
||||
histogram_quantile(0.01, sum without (namespace, service, endpoint) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 86400
|
||||
and
|
||||
on(job, cluster, instance) apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
|
|
|
@ -244,12 +244,13 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
|
|||
{{< code lang="yaml" >}}
|
||||
alert: KubeContainerWaiting
|
||||
annotations:
|
||||
description: pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container
|
||||
{{ $labels.container}} has been in waiting state for longer than 1 hour.
|
||||
description: 'pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container
|
||||
{{ $labels.container}} has been in waiting state for longer than 1 hour. (reason:
|
||||
"{{ $labels.reason }}").'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting
|
||||
summary: Pod container waiting longer than 1 hour
|
||||
expr: |
|
||||
sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0
|
||||
kube_pod_container_status_waiting_reason{reason!="CrashLoopBackOff", job="kube-state-metrics"} > 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
|
@ -525,9 +526,9 @@ annotations:
|
|||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
|
||||
summary: Processes experience elevated CPU throttling.
|
||||
expr: |
|
||||
sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (cluster, container, pod, namespace)
|
||||
sum(increase(container_cpu_cfs_throttled_periods_total{container!="", job="cadvisor", }[5m])) without (id, metrics_path, name, image, endpoint, job, node)
|
||||
/
|
||||
sum(increase(container_cpu_cfs_periods_total{}[5m])) by (cluster, container, pod, namespace)
|
||||
sum(increase(container_cpu_cfs_periods_total{job="cadvisor", }[5m])) without (id, metrics_path, name, image, endpoint, job, node)
|
||||
> ( 25 / 100 )
|
||||
for: 15m
|
||||
labels:
|
||||
|
@ -805,7 +806,9 @@ annotations:
|
|||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
|
||||
summary: Client certificate is about to expire.
|
||||
expr: |
|
||||
apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(cluster, job) histogram_quantile(0.01, sum by (cluster, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 604800
|
||||
histogram_quantile(0.01, sum without (namespace, service, endpoint) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 604800
|
||||
and
|
||||
on(job, cluster, instance) apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
|
@ -822,7 +825,9 @@ annotations:
|
|||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
|
||||
summary: Client certificate is about to expire.
|
||||
expr: |
|
||||
apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(cluster, job) histogram_quantile(0.01, sum by (cluster, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 86400
|
||||
histogram_quantile(0.01, sum without (namespace, service, endpoint) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 86400
|
||||
and
|
||||
on(job, cluster, instance) apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
|
|
Loading…
Reference in a new issue