1
0
Fork 0
mirror of https://github.com/monitoring-mixins/website.git synced 2024-12-14 11:37:31 +00:00
monitoring-mixins-website/assets/apache-couchdb/alerts.yaml
Vitaly Zhuravlev b3b400137a Add jsonnet-libs mixins
Add blackbox exporter
Add mysql exporter
2024-05-04 12:01:41 +00:00

114 lines
4.8 KiB
YAML

groups:
- name: ApacheCouchDBAlerts
rules:
- alert: CouchDBUnhealthyCluster
annotations:
description: '{{$labels.couchdb_cluster}} has reported a value of {{ printf
"%.0f" $value }} for its stability over the last 5 minutes, which is below
the threshold of 1.'
summary: At least one of the nodes in a cluster is reporting the cluster as
being unstable.
expr: |
min by(job, couchdb_cluster) (couchdb_couch_replicator_cluster_is_stable) < 1
for: 5m
labels:
severity: critical
- alert: CouchDBHigh4xxResponseCodes
annotations:
description: '{{ printf "%.0f" $value }} 4xx responses have been detected over
the last 5 minutes on {{$labels.instance}}, which is above the threshold of
5.'
summary: There are a high number of 4xx responses for incoming requests to a
node.
expr: |
sum by(job, instance) (increase(couchdb_httpd_status_codes{code=~"4.*"}[5m])) > 5
for: 5m
labels:
severity: warning
- alert: CouchDBHigh5xxResponseCodes
annotations:
description: '{{ printf "%.0f" $value }} 5xx responses have been detected over
the last 5 minutes on {{$labels.instance}}, which is above the threshold of
0.'
summary: There are a high number of 5xx responses for incoming requests to a
node.
expr: |
sum by(job, instance) (increase(couchdb_httpd_status_codes{code=~"5.*"}[5m])) > 0
for: 5m
labels:
severity: critical
- alert: CouchDBModerateRequestLatency
annotations:
description: 'An average of {{ printf "%.0f" $value }}ms of request latency
has occurred over the last 5 minutes on {{$labels.instance}}, which is above
the threshold of 500ms. '
summary: There is a moderate level of request latency for a node.
expr: |
sum by(job, instance) (couchdb_request_time_seconds_sum / couchdb_request_time_seconds_count) > 500
for: 5m
labels:
severity: warning
- alert: CouchDBHighRequestLatency
annotations:
description: 'An average of {{ printf "%.0f" $value }}ms of request latency
has occurred over the last 5 minutes on {{$labels.instance}}, which is above
the threshold of 1000ms. '
summary: There is a high level of request latency for a node.
expr: |
sum by(job, instance) (couchdb_request_time_seconds_sum / couchdb_request_time_seconds_count) > 1000
for: 5m
labels:
severity: critical
- alert: CouchDBManyReplicatorJobsPending
annotations:
description: '{{ printf "%.0f" $value }} replicator jobs are pending on {{$labels.instance}},
which is above the threshold of 10. '
summary: There is a high number of replicator jobs pending for a node.
expr: |
sum by(job, instance) (couchdb_couch_replicator_jobs_pending) > 10
for: 5m
labels:
severity: warning
- alert: CouchDBReplicatorJobsCrashing
annotations:
description: '{{ printf "%.0f" $value }} replicator jobs have crashed over the
last 5 minutes on {{$labels.instance}}, which is above the threshold of 0. '
summary: There are replicator jobs crashing for a node.
expr: |
sum by(job, instance) (increase(couchdb_couch_replicator_jobs_crashes_total[5m])) > 0
for: 5m
labels:
severity: critical
- alert: CouchDBReplicatorChangesQueuesDying
annotations:
description: '{{ printf "%.0f" $value }} replicator changes queue processes
have died over the last 5 minutes on {{$labels.instance}}, which is above
the threshold of 0. '
summary: There are replicator changes queue process deaths for a node.
expr: |
sum by(job, instance) (increase(couchdb_couch_replicator_changes_queue_deaths_total[5m])) > 0
for: 5m
labels:
severity: warning
- alert: CouchDBReplicatorConnectionOwnersCrashing
annotations:
description: '{{ printf "%.0f" $value }} replicator connection owner processes
have crashed over the last 5 minutes on {{$labels.instance}}, which is above
the threshold of 0. '
summary: There are replicator connection owner process crashes for a node.
expr: |
sum by(job, instance) (increase(couchdb_couch_replicator_connection_owner_crashes_total[5m])) > 0
for: 5m
labels:
severity: warning
- alert: CouchDBReplicatorConnectionWorkersCrashing
annotations:
description: '{{ printf "%.0f" $value }} replicator connection worker processes
have crashed over the last 5 minutes on {{$labels.instance}}, which is above
the threshold of 0. '
summary: There are replicator connection worker process crashes for a node.
expr: |
sum by(job, instance) (increase(couchdb_couch_replicator_connection_worker_crashes_total[5m])) > 0
for: 5m
labels:
severity: warning