mirror of
https://github.com/monitoring-mixins/website.git
synced 2024-12-14 11:37:31 +00:00
110 lines
5.2 KiB
YAML
110 lines
5.2 KiB
YAML
groups:
|
|
- name: coredns
|
|
rules:
|
|
- alert: CoreDNSDown
|
|
annotations:
|
|
description: CoreDNS has disappeared from Prometheus target discovery.
|
|
runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsdown
|
|
summary: CoreDNS has disappeared from Prometheus target discovery.
|
|
expr: |
|
|
absent(up{job="kube-dns"} == 1)
|
|
for: 15m
|
|
labels:
|
|
severity: critical
|
|
- alert: CoreDNSLatencyHigh
|
|
annotations:
|
|
description: CoreDNS has 99th percentile latency of {{ $value }} seconds for
|
|
server {{ $labels.server }} zone {{ $labels.zone }} .
|
|
runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednslatencyhigh
|
|
summary: CoreDNS is experiencing high 99th percentile latency.
|
|
expr: |
|
|
histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_seconds_bucket{job="kube-dns"}[5m])) without (instance,pod)) > 4
|
|
for: 10m
|
|
labels:
|
|
severity: critical
|
|
- alert: CoreDNSErrorsHigh
|
|
annotations:
|
|
description: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage
|
|
}} of requests.
|
|
runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednserrorshigh
|
|
summary: CoreDNS is returning SERVFAIL.
|
|
expr: |
|
|
sum without (pod, instance, server, zone, view, rcode, plugin) (rate(coredns_dns_responses_total{job="kube-dns",rcode="SERVFAIL"}[5m]))
|
|
/
|
|
sum without (pod, instance, server, zone, view, rcode, plugin) (rate(coredns_dns_responses_total{job="kube-dns"}[5m])) > 0.03
|
|
for: 10m
|
|
labels:
|
|
severity: critical
|
|
- alert: CoreDNSErrorsHigh
|
|
annotations:
|
|
description: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage
|
|
}} of requests.
|
|
runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednserrorshigh
|
|
summary: CoreDNS is returning SERVFAIL.
|
|
expr: |
|
|
sum without (pod, instance, server, zone, view, rcode, plugin) (rate(coredns_dns_responses_total{job="kube-dns",rcode="SERVFAIL"}[5m]))
|
|
/
|
|
sum without (pod, instance, server, zone, view, rcode, plugin) (rate(coredns_dns_responses_total{job="kube-dns"}[5m])) > 0.01
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
- name: coredns_forward
|
|
rules:
|
|
- alert: CoreDNSForwardLatencyHigh
|
|
annotations:
|
|
description: CoreDNS has 99th percentile latency of {{ $value }} seconds forwarding
|
|
requests to {{ $labels.to }}.
|
|
runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsforwardlatencyhigh
|
|
summary: CoreDNS is experiencing high latency forwarding requests.
|
|
expr: |
|
|
histogram_quantile(0.99, sum(rate(coredns_forward_request_duration_seconds_bucket{job="kube-dns"}[5m])) without (pod, instance, rcode)) > 4
|
|
for: 10m
|
|
labels:
|
|
severity: critical
|
|
- alert: CoreDNSForwardErrorsHigh
|
|
annotations:
|
|
description: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage
|
|
}} of forward requests to {{ $labels.to }}.
|
|
runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsforwarderrorshigh
|
|
summary: CoreDNS is returning SERVFAIL for forward requests.
|
|
expr: |
|
|
sum without (pod, instance, rcode) (rate(coredns_forward_responses_total{job="kube-dns",rcode="SERVFAIL"}[5m]))
|
|
/
|
|
sum without (pod, instance, rcode) (rate(coredns_forward_responses_total{job="kube-dns"}[5m])) > 0.03
|
|
for: 10m
|
|
labels:
|
|
severity: critical
|
|
- alert: CoreDNSForwardErrorsHigh
|
|
annotations:
|
|
description: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage
|
|
}} of forward requests to {{ $labels.to }}.
|
|
runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsforwarderrorshigh
|
|
summary: CoreDNS is returning SERVFAIL for forward requests.
|
|
expr: |
|
|
sum without (pod, instance, rcode) (rate(coredns_forward_responses_total{job="kube-dns",rcode="SERVFAIL"}[5m]))
|
|
/
|
|
sum without (pod, instance, rcode) (rate(coredns_forward_responses_total{job="kube-dns"}[5m])) > 0.01
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
- alert: CoreDNSForwardHealthcheckFailureCount
|
|
annotations:
|
|
description: CoreDNS health checks have failed to upstream server {{ $labels.to
|
|
}}.
|
|
runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsforwardhealthcheckfailurecount
|
|
summary: CoreDNS health checks have failed to upstream server.
|
|
expr: |
|
|
sum without (pod, instance) (rate(coredns_forward_healthcheck_failures_total{job="kube-dns"}[5m])) > 0
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
- alert: CoreDNSForwardHealthcheckBrokenCount
|
|
annotations:
|
|
description: CoreDNS health checks have failed for all upstream servers.
|
|
runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsforwardhealthcheckbrokencount
|
|
summary: CoreDNS health checks have failed for all upstream servers.
|
|
expr: |
|
|
sum without (pod, instance) (rate(coredns_forward_healthcheck_broken_total{job="kube-dns"}[5m])) > 0
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|