1
0
Fork 0
mirror of https://github.com/monitoring-mixins/website.git synced 2024-12-14 11:37:31 +00:00
monitoring-mixins-website/assets/influxdb/alerts.yaml
Vitaly Zhuravlev b3b400137a Add jsonnet-libs mixins
Add blackbox exporter
Add mysql exporter
2024-05-04 12:01:41 +00:00

71 lines
3.5 KiB
YAML

groups:
- name: influxdb
rules:
- alert: InfluxDBWarningTaskSchedulerHighFailureRate
annotations:
description: Task scheduler task executions for instance {{$labels.instance}}
on cluster {{$labels.influxdb_cluster}} are failing at a rate of {{ printf
"%.0f" $value }} percent, which is above the threshold of 25 percent.
summary: Automated data processing tasks are failing at a high rate.
expr: |
100 * rate(task_scheduler_total_execute_failure[5m])/clamp_min(rate(task_scheduler_total_execution_calls[5m]), 1) >= 25
for: 5m
labels:
severity: warning
- alert: InfluxDBCriticalTaskSchedulerHighFailureRate
annotations:
description: Task scheduler task executions for instance {{$labels.instance}}
on cluster {{$labels.influxdb_cluster}} are failing at a rate of {{ printf
"%.0f" $value }} percent, which is above the threshold of 50 percent.
summary: Automated data processing tasks are failing at a critical rate.
expr: |
100 * rate(task_scheduler_total_execute_failure[5m])/clamp_min(rate(task_scheduler_total_execution_calls[5m]), 1) >= 50
for: 5m
labels:
severity: critical
- alert: InfluxDBHighBusyWorkerPercentage
annotations:
description: The busy worker percentage for instance {{$labels.instance}} on
cluster {{$labels.influxdb_cluster}} is {{ printf "%.0f" $value }} percent,
which is above the threshold of 80 percent.
summary: There is a high percentage of busy workers.
expr: |
task_executor_workers_busy >= 80
for: 5m
labels:
severity: critical
- alert: InfluxDBHighHeapMemoryUsage
annotations:
description: The heap memory usage for instance {{$labels.instance}} on cluster
{{$labels.influxdb_cluster}} is {{ printf "%.0f" $value }} percent, which
is above the threshold of 80 percent.
summary: There is a high amount of heap memory being used.
expr: |
100 * go_memstats_heap_alloc_bytes/clamp_min((go_memstats_heap_idle_bytes + go_memstats_heap_alloc_bytes), 1) >= 80
for: 5m
labels:
severity: critical
- alert: InfluxDBHighAverageAPIRequestLatency
annotations:
description: The average API request latency for instance {{$labels.instance}}
on cluster {{$labels.influxdb_cluster}} is {{ printf "%.2f" $value }} seconds,
which is above the threshold of 0.29999999999999999 seconds.
summary: Average API request latency is too high. High latency will negatively
affect system performance, degrading data availability and precision.
expr: |
sum without(handler, method, path, response_code, status, user_agent) (increase(http_api_request_duration_seconds_sum[5m])/clamp_min(increase(http_api_requests_total[5m]), 1)) >= 0.29999999999999999
for: 1m
labels:
severity: critical
- alert: InfluxDBSlowAverageIQLExecutionTime
annotations:
description: The average InfluxQL query execution time for instance {{$labels.instance}}
on cluster {{$labels.influxdb_cluster}} is {{ printf "%.2f" $value }} seconds,
which is above the threshold of 0.10000000000000001 seconds.
summary: InfluxQL execution times are too slow. Slow query execution times will
negatively affect system performance, degrading data availability and precision.
expr: |
sum without(result) (increase(influxql_service_executing_duration_seconds_sum[5m])/clamp_min(increase(influxql_service_requests_total[5m]), 1)) >= 0.10000000000000001
for: 5m
labels:
severity: warning