1
0
Fork 0
mirror of https://github.com/monitoring-mixins/website.git synced 2024-12-14 11:37:31 +00:00
monitoring-mixins-website/assets/apache-cassandra/alerts.yaml
Vitaly Zhuravlev b3b400137a Add jsonnet-libs mixins
Add blackbox exporter
Add mysql exporter
2024-05-04 12:01:41 +00:00

91 lines
3.7 KiB
YAML

groups:
- name: ApacheCassandraAlerts
rules:
- alert: HighReadLatency
annotations:
description: 'An average of {{ printf "%.0f" $value }}ms of read latency has
occurred over the last 5 minutes on {{$labels.instance}}, which is above the
threshold of 200ms. '
summary: There is a high level of read latency within the node.
expr: |
sum(cassandra_table_readlatency_seconds_sum) by (instance) / sum(cassandra_table_readlatency_seconds_count) by (instance) * 1000 > 200
for: 5m
labels:
severity: critical
- alert: HighWriteLatency
annotations:
description: 'An average of {{ printf "%.0f" $value }}ms of write latency has
occurred over the last 5 minutes on {{$labels.instance}}, which is above the
threshold of 200ms. '
summary: There is a high level of write latency within the node.
expr: |
sum(cassandra_keyspace_writelatency_seconds_sum) by (instance) / sum(cassandra_keyspace_writelatency_seconds_count) by (instance) * 1000 > 200
for: 5m
labels:
severity: critical
- alert: HighPendingCompactionTasks
annotations:
description: '{{ printf "%.0f" $value }} compaction tasks have been pending
over the last 15 minutes on {{$labels.instance}}, which is above the threshold
of 30. '
summary: Compaction task queue is filling up.
expr: |
cassandra_compaction_pendingtasks > 30
for: 15m
labels:
severity: warning
- alert: BlockedCompactionTasksFound
annotations:
description: '{{ printf "%.0f" $value }} compaction tasks have been blocked
over the last 5 minutes on {{$labels.instance}}, which is above the threshold
of 1. '
summary: Compaction task queue is full.
expr: |
cassandra_threadpools_currentlyblockedtasks_count{threadpools="CompactionExecutor", path="internal"} > 1
for: 5m
labels:
severity: critical
- alert: HintsStoredOnNode
annotations:
description: '{{ printf "%.0f" $value }} hints have been written to the node
over the last minute on {{$labels.instance}}, which is above the threshold
of 1. '
summary: Hints have been recently written to this node.
expr: |
increase(cassandra_storage_totalhints_count[5m]) > 1
for: 1m
labels:
severity: warning
- alert: UnavailableWriteRequestsFound
annotations:
description: '{{ printf "%.0f" $value }} unavailable write requests have been
found over the last 5 minutes on {{$labels.instance}}, which is above the
threshold of 1. '
summary: Unavailable exceptions have been encountered while performing writes
in this cluster.
expr: |
sum(cassandra_clientrequest_unavailables_count{clientrequest="Write"}) by (cassandra_cluster) > 1
for: 5m
labels:
severity: critical
- alert: HighCpuUsage
annotations:
description: 'Cpu usage is at {{ printf "%.0f" $value }} percent over the last
5 minutes on {{$labels.instance}}, which is above the threshold of 80. '
summary: A node has a CPU usage higher than the configured threshold.
expr: |
jvm_process_cpu_load{job=~"integrations/apache-cassandra"} * 100 > 80
for: 5m
labels:
severity: critical
- alert: HighMemoryUsage
annotations:
description: 'Memory usage is at {{ printf "%.0f" $value }} percent over the
last 5 minutes on {{$labels.instance}}, which is above the threshold of 80
}}. '
summary: A node has a higher memory utilization than the configured threshold.
expr: |
sum(jvm_memory_usage_used_bytes{job=~"integrations/apache-cassandra", area="Heap"}) / sum(jvm_physical_memory_size{job=~"integrations/apache-cassandra"}) * 100 > 80
for: 5m
labels:
severity: critical