mirror of
https://github.com/monitoring-mixins/website.git
synced 2024-12-14 11:37:31 +00:00
b3b400137a
Add blackbox exporter Add mysql exporter
114 lines
4.8 KiB
YAML
114 lines
4.8 KiB
YAML
groups:
|
|
- name: ApacheCouchDBAlerts
|
|
rules:
|
|
- alert: CouchDBUnhealthyCluster
|
|
annotations:
|
|
description: '{{$labels.couchdb_cluster}} has reported a value of {{ printf
|
|
"%.0f" $value }} for its stability over the last 5 minutes, which is below
|
|
the threshold of 1.'
|
|
summary: At least one of the nodes in a cluster is reporting the cluster as
|
|
being unstable.
|
|
expr: |
|
|
min by(job, couchdb_cluster) (couchdb_couch_replicator_cluster_is_stable) < 1
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
- alert: CouchDBHigh4xxResponseCodes
|
|
annotations:
|
|
description: '{{ printf "%.0f" $value }} 4xx responses have been detected over
|
|
the last 5 minutes on {{$labels.instance}}, which is above the threshold of
|
|
5.'
|
|
summary: There are a high number of 4xx responses for incoming requests to a
|
|
node.
|
|
expr: |
|
|
sum by(job, instance) (increase(couchdb_httpd_status_codes{code=~"4.*"}[5m])) > 5
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
- alert: CouchDBHigh5xxResponseCodes
|
|
annotations:
|
|
description: '{{ printf "%.0f" $value }} 5xx responses have been detected over
|
|
the last 5 minutes on {{$labels.instance}}, which is above the threshold of
|
|
0.'
|
|
summary: There are a high number of 5xx responses for incoming requests to a
|
|
node.
|
|
expr: |
|
|
sum by(job, instance) (increase(couchdb_httpd_status_codes{code=~"5.*"}[5m])) > 0
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
- alert: CouchDBModerateRequestLatency
|
|
annotations:
|
|
description: 'An average of {{ printf "%.0f" $value }}ms of request latency
|
|
has occurred over the last 5 minutes on {{$labels.instance}}, which is above
|
|
the threshold of 500ms. '
|
|
summary: There is a moderate level of request latency for a node.
|
|
expr: |
|
|
sum by(job, instance) (couchdb_request_time_seconds_sum / couchdb_request_time_seconds_count) > 500
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
- alert: CouchDBHighRequestLatency
|
|
annotations:
|
|
description: 'An average of {{ printf "%.0f" $value }}ms of request latency
|
|
has occurred over the last 5 minutes on {{$labels.instance}}, which is above
|
|
the threshold of 1000ms. '
|
|
summary: There is a high level of request latency for a node.
|
|
expr: |
|
|
sum by(job, instance) (couchdb_request_time_seconds_sum / couchdb_request_time_seconds_count) > 1000
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
- alert: CouchDBManyReplicatorJobsPending
|
|
annotations:
|
|
description: '{{ printf "%.0f" $value }} replicator jobs are pending on {{$labels.instance}},
|
|
which is above the threshold of 10. '
|
|
summary: There is a high number of replicator jobs pending for a node.
|
|
expr: |
|
|
sum by(job, instance) (couchdb_couch_replicator_jobs_pending) > 10
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
- alert: CouchDBReplicatorJobsCrashing
|
|
annotations:
|
|
description: '{{ printf "%.0f" $value }} replicator jobs have crashed over the
|
|
last 5 minutes on {{$labels.instance}}, which is above the threshold of 0. '
|
|
summary: There are replicator jobs crashing for a node.
|
|
expr: |
|
|
sum by(job, instance) (increase(couchdb_couch_replicator_jobs_crashes_total[5m])) > 0
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
- alert: CouchDBReplicatorChangesQueuesDying
|
|
annotations:
|
|
description: '{{ printf "%.0f" $value }} replicator changes queue processes
|
|
have died over the last 5 minutes on {{$labels.instance}}, which is above
|
|
the threshold of 0. '
|
|
summary: There are replicator changes queue process deaths for a node.
|
|
expr: |
|
|
sum by(job, instance) (increase(couchdb_couch_replicator_changes_queue_deaths_total[5m])) > 0
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
- alert: CouchDBReplicatorConnectionOwnersCrashing
|
|
annotations:
|
|
description: '{{ printf "%.0f" $value }} replicator connection owner processes
|
|
have crashed over the last 5 minutes on {{$labels.instance}}, which is above
|
|
the threshold of 0. '
|
|
summary: There are replicator connection owner process crashes for a node.
|
|
expr: |
|
|
sum by(job, instance) (increase(couchdb_couch_replicator_connection_owner_crashes_total[5m])) > 0
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
- alert: CouchDBReplicatorConnectionWorkersCrashing
|
|
annotations:
|
|
description: '{{ printf "%.0f" $value }} replicator connection worker processes
|
|
have crashed over the last 5 minutes on {{$labels.instance}}, which is above
|
|
the threshold of 0. '
|
|
summary: There are replicator connection worker process crashes for a node.
|
|
expr: |
|
|
sum by(job, instance) (increase(couchdb_couch_replicator_connection_worker_crashes_total[5m])) > 0
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|