mirror of
https://github.com/monitoring-mixins/website.git
synced 2024-12-15 17:50:48 +00:00
81 lines
3 KiB
YAML
81 lines
3 KiB
YAML
groups:
|
|
- name: cockroachdb
|
|
rules:
|
|
- alert: CockroachInstanceFlapping
|
|
annotations:
|
|
description: '{{ $labels.instance }} for cluster {{ $labels.cluster }} restarted
|
|
{{ $value }} time(s) in 10m.'
|
|
summary: CockroachDB instances have restarted in the last 10 minutes.
|
|
expr: |
|
|
resets(cockroachdb_sys_uptime{job="cockroachdb-public"}[10m]) > 5
|
|
for: 1m
|
|
labels:
|
|
severity: warning
|
|
- alert: CockroachLivenessMismatch
|
|
annotations:
|
|
description: Liveness mismatch for {{ $labels.instance }}
|
|
summary: CockroachDB has liveness mismatches.
|
|
expr: |
|
|
(cockroachdb_liveness_livenodes{job="cockroachdb-public"})
|
|
!=
|
|
ignoring(instance) group_left() (count by(cluster, job) (up{job="cockroachdb-public"} == 1))
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
- alert: CockroachVersionMismatch
|
|
annotations:
|
|
description: Cluster {{ $labels.cluster }} running {{ $value }} different versions
|
|
summary: CockroachDB cluster is running different versions.
|
|
expr: |
|
|
count by(cluster) (count_values by(tag, cluster) ("version", cockroachdb_build_timestamp{job="cockroachdb-public"})) > 1
|
|
for: 1h
|
|
labels:
|
|
severity: warning
|
|
- alert: CockroachStoreDiskLow
|
|
annotations:
|
|
description: Store {{ $labels.store }} on node {{ $labels.instance }} at {{
|
|
$value }} available disk fraction
|
|
summary: CockroachDB is at low disk capacity.
|
|
expr: |
|
|
:cockroachdb_capacity_available:ratio{job="cockroachdb-public"} < 0.15
|
|
for: 30m
|
|
labels:
|
|
severity: critical
|
|
- alert: CockroachClusterDiskLow
|
|
annotations:
|
|
description: Cluster {{ $labels.cluster }} at {{ $value }} available disk fraction
|
|
summary: CockroachDB cluster is at critically low disk capacity.
|
|
expr: |
|
|
cluster:cockroachdb_capacity_available:ratio{job="cockroachdb-public"} < 0.2
|
|
for: 30m
|
|
labels:
|
|
severity: critical
|
|
- alert: CockroachUnavailableRanges
|
|
annotations:
|
|
description: Instance {{ $labels.instance }} has {{ $value }} unavailable ranges
|
|
summary: CockroachDB has unavailable ranges.
|
|
expr: |
|
|
(sum by(instance, cluster) (cockroachdb_ranges_unavailable{job="cockroachdb-public"})) > 0
|
|
for: 10m
|
|
labels:
|
|
severity: critical
|
|
- alert: CockroachNoLeaseRanges
|
|
annotations:
|
|
description: Instance {{ $labels.instance }} has {{ $value }} ranges without
|
|
leases
|
|
summary: CockroachDB has ranges without leases.
|
|
expr: |
|
|
(sum by(instance, cluster) (cockroachdb_replicas_leaders_not_leaseholders{job="cockroachdb-public"})) > 0
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
- alert: CockroachHighOpenFDCount
|
|
annotations:
|
|
description: 'Too many open file descriptors on {{ $labels.instance }}: {{ $value
|
|
}} fraction used'
|
|
summary: CockroachDB has too many open file descriptors.
|
|
expr: |
|
|
cockroachdb_sys_fd_open{job="cockroachdb-public"} / cockroachdb_sys_fd_softlimit{job="cockroachdb-public"} > 0.8
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|