1
0
Fork 0
mirror of https://github.com/monitoring-mixins/website.git synced 2024-12-14 11:37:31 +00:00
monitoring-mixins-website/assets/coredns/alerts.yaml
2023-04-26 03:20:51 +00:00

110 lines
5.2 KiB
YAML

groups:
- name: coredns
rules:
- alert: CoreDNSDown
annotations:
description: CoreDNS has disappeared from Prometheus target discovery.
runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsdown
summary: CoreDNS has disappeared from Prometheus target discovery.
expr: |
absent(up{job="kube-dns"} == 1)
for: 15m
labels:
severity: critical
- alert: CoreDNSLatencyHigh
annotations:
description: CoreDNS has 99th percentile latency of {{ $value }} seconds for
server {{ $labels.server }} zone {{ $labels.zone }} .
runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednslatencyhigh
summary: CoreDNS is experiencing high 99th percentile latency.
expr: |
histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_seconds_bucket{job="kube-dns"}[5m])) without (instance,pod)) > 4
for: 10m
labels:
severity: critical
- alert: CoreDNSErrorsHigh
annotations:
description: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage
}} of requests.
runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednserrorshigh
summary: CoreDNS is returning SERVFAIL.
expr: |
sum without (pod, instance, server, zone, view, rcode, plugin) (rate(coredns_dns_responses_total{job="kube-dns",rcode="SERVFAIL"}[5m]))
/
sum without (pod, instance, server, zone, view, rcode, plugin) (rate(coredns_dns_responses_total{job="kube-dns"}[5m])) > 0.03
for: 10m
labels:
severity: critical
- alert: CoreDNSErrorsHigh
annotations:
description: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage
}} of requests.
runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednserrorshigh
summary: CoreDNS is returning SERVFAIL.
expr: |
sum without (pod, instance, server, zone, view, rcode, plugin) (rate(coredns_dns_responses_total{job="kube-dns",rcode="SERVFAIL"}[5m]))
/
sum without (pod, instance, server, zone, view, rcode, plugin) (rate(coredns_dns_responses_total{job="kube-dns"}[5m])) > 0.01
for: 10m
labels:
severity: warning
- name: coredns_forward
rules:
- alert: CoreDNSForwardLatencyHigh
annotations:
description: CoreDNS has 99th percentile latency of {{ $value }} seconds forwarding
requests to {{ $labels.to }}.
runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsforwardlatencyhigh
summary: CoreDNS is experiencing high latency forwarding requests.
expr: |
histogram_quantile(0.99, sum(rate(coredns_forward_request_duration_seconds_bucket{job="kube-dns"}[5m])) without (pod, instance, rcode)) > 4
for: 10m
labels:
severity: critical
- alert: CoreDNSForwardErrorsHigh
annotations:
description: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage
}} of forward requests to {{ $labels.to }}.
runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsforwarderrorshigh
summary: CoreDNS is returning SERVFAIL for forward requests.
expr: |
sum without (pod, instance, rcode) (rate(coredns_forward_responses_total{job="kube-dns",rcode="SERVFAIL"}[5m]))
/
sum without (pod, instance, rcode) (rate(coredns_forward_responses_total{job="kube-dns"}[5m])) > 0.03
for: 10m
labels:
severity: critical
- alert: CoreDNSForwardErrorsHigh
annotations:
description: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage
}} of forward requests to {{ $labels.to }}.
runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsforwarderrorshigh
summary: CoreDNS is returning SERVFAIL for forward requests.
expr: |
sum without (pod, instance, rcode) (rate(coredns_forward_responses_total{job="kube-dns",rcode="SERVFAIL"}[5m]))
/
sum without (pod, instance, rcode) (rate(coredns_forward_responses_total{job="kube-dns"}[5m])) > 0.01
for: 10m
labels:
severity: warning
- alert: CoreDNSForwardHealthcheckFailureCount
annotations:
description: CoreDNS health checks have failed to upstream server {{ $labels.to
}}.
runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsforwardhealthcheckfailurecount
summary: CoreDNS health checks have failed to upstream server.
expr: |
sum without (pod, instance) (rate(coredns_forward_healthcheck_failures_total{job="kube-dns"}[5m])) > 0
for: 10m
labels:
severity: warning
- alert: CoreDNSForwardHealthcheckBrokenCount
annotations:
description: CoreDNS health checks have failed for all upstream servers.
runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsforwardhealthcheckbrokencount
summary: CoreDNS health checks have failed for all upstream servers.
expr: |
sum without (pod, instance) (rate(coredns_forward_healthcheck_broken_total{job="kube-dns"}[5m])) > 0
for: 10m
labels:
severity: warning