1
0
Fork 0
mirror of https://github.com/monitoring-mixins/website.git synced 2024-12-15 17:50:48 +00:00
monitoring-mixins-website/assets/kube-cockroachdb/alerts.yaml
2023-05-18 03:20:01 +00:00

81 lines
3 KiB
YAML

groups:
- name: cockroachdb
rules:
- alert: CockroachInstanceFlapping
annotations:
description: '{{ $labels.instance }} for cluster {{ $labels.cluster }} restarted
{{ $value }} time(s) in 10m.'
summary: CockroachDB instances have restarted in the last 10 minutes.
expr: |
resets(cockroachdb_sys_uptime{job="cockroachdb-public"}[10m]) > 5
for: 1m
labels:
severity: warning
- alert: CockroachLivenessMismatch
annotations:
description: Liveness mismatch for {{ $labels.instance }}
summary: CockroachDB has liveness mismatches.
expr: |
(cockroachdb_liveness_livenodes{job="cockroachdb-public"})
!=
ignoring(instance) group_left() (count by(cluster, job) (up{job="cockroachdb-public"} == 1))
for: 5m
labels:
severity: warning
- alert: CockroachVersionMismatch
annotations:
description: Cluster {{ $labels.cluster }} running {{ $value }} different versions
summary: CockroachDB cluster is running different versions.
expr: |
count by(cluster) (count_values by(tag, cluster) ("version", cockroachdb_build_timestamp{job="cockroachdb-public"})) > 1
for: 1h
labels:
severity: warning
- alert: CockroachStoreDiskLow
annotations:
description: Store {{ $labels.store }} on node {{ $labels.instance }} at {{
$value }} available disk fraction
summary: CockroachDB is at low disk capacity.
expr: |
:cockroachdb_capacity_available:ratio{job="cockroachdb-public"} < 0.15
for: 30m
labels:
severity: critical
- alert: CockroachClusterDiskLow
annotations:
description: Cluster {{ $labels.cluster }} at {{ $value }} available disk fraction
summary: CockroachDB cluster is at critically low disk capacity.
expr: |
cluster:cockroachdb_capacity_available:ratio{job="cockroachdb-public"} < 0.2
for: 30m
labels:
severity: critical
- alert: CockroachUnavailableRanges
annotations:
description: Instance {{ $labels.instance }} has {{ $value }} unavailable ranges
summary: CockroachDB has unavailable ranges.
expr: |
(sum by(instance, cluster) (cockroachdb_ranges_unavailable{job="cockroachdb-public"})) > 0
for: 10m
labels:
severity: critical
- alert: CockroachNoLeaseRanges
annotations:
description: Instance {{ $labels.instance }} has {{ $value }} ranges without
leases
summary: CockroachDB has ranges without leases.
expr: |
(sum by(instance, cluster) (cockroachdb_replicas_leaders_not_leaseholders{job="cockroachdb-public"})) > 0
for: 10m
labels:
severity: warning
- alert: CockroachHighOpenFDCount
annotations:
description: 'Too many open file descriptors on {{ $labels.instance }}: {{ $value
}} fraction used'
summary: CockroachDB has too many open file descriptors.
expr: |
cockroachdb_sys_fd_open{job="cockroachdb-public"} / cockroachdb_sys_fd_softlimit{job="cockroachdb-public"} > 0.8
for: 10m
labels:
severity: warning