mirror of
https://github.com/monitoring-mixins/website.git
synced 2024-12-14 11:37:31 +00:00
b3b400137a
Add blackbox exporter Add mysql exporter
71 lines
3.5 KiB
YAML
71 lines
3.5 KiB
YAML
groups:
|
|
- name: influxdb
|
|
rules:
|
|
- alert: InfluxDBWarningTaskSchedulerHighFailureRate
|
|
annotations:
|
|
description: Task scheduler task executions for instance {{$labels.instance}}
|
|
on cluster {{$labels.influxdb_cluster}} are failing at a rate of {{ printf
|
|
"%.0f" $value }} percent, which is above the threshold of 25 percent.
|
|
summary: Automated data processing tasks are failing at a high rate.
|
|
expr: |
|
|
100 * rate(task_scheduler_total_execute_failure[5m])/clamp_min(rate(task_scheduler_total_execution_calls[5m]), 1) >= 25
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
- alert: InfluxDBCriticalTaskSchedulerHighFailureRate
|
|
annotations:
|
|
description: Task scheduler task executions for instance {{$labels.instance}}
|
|
on cluster {{$labels.influxdb_cluster}} are failing at a rate of {{ printf
|
|
"%.0f" $value }} percent, which is above the threshold of 50 percent.
|
|
summary: Automated data processing tasks are failing at a critical rate.
|
|
expr: |
|
|
100 * rate(task_scheduler_total_execute_failure[5m])/clamp_min(rate(task_scheduler_total_execution_calls[5m]), 1) >= 50
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
- alert: InfluxDBHighBusyWorkerPercentage
|
|
annotations:
|
|
description: The busy worker percentage for instance {{$labels.instance}} on
|
|
cluster {{$labels.influxdb_cluster}} is {{ printf "%.0f" $value }} percent,
|
|
which is above the threshold of 80 percent.
|
|
summary: There is a high percentage of busy workers.
|
|
expr: |
|
|
task_executor_workers_busy >= 80
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
- alert: InfluxDBHighHeapMemoryUsage
|
|
annotations:
|
|
description: The heap memory usage for instance {{$labels.instance}} on cluster
|
|
{{$labels.influxdb_cluster}} is {{ printf "%.0f" $value }} percent, which
|
|
is above the threshold of 80 percent.
|
|
summary: There is a high amount of heap memory being used.
|
|
expr: |
|
|
100 * go_memstats_heap_alloc_bytes/clamp_min((go_memstats_heap_idle_bytes + go_memstats_heap_alloc_bytes), 1) >= 80
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
- alert: InfluxDBHighAverageAPIRequestLatency
|
|
annotations:
|
|
description: The average API request latency for instance {{$labels.instance}}
|
|
on cluster {{$labels.influxdb_cluster}} is {{ printf "%.2f" $value }} seconds,
|
|
which is above the threshold of 0.29999999999999999 seconds.
|
|
summary: Average API request latency is too high. High latency will negatively
|
|
affect system performance, degrading data availability and precision.
|
|
expr: |
|
|
sum without(handler, method, path, response_code, status, user_agent) (increase(http_api_request_duration_seconds_sum[5m])/clamp_min(increase(http_api_requests_total[5m]), 1)) >= 0.29999999999999999
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
- alert: InfluxDBSlowAverageIQLExecutionTime
|
|
annotations:
|
|
description: The average InfluxQL query execution time for instance {{$labels.instance}}
|
|
on cluster {{$labels.influxdb_cluster}} is {{ printf "%.2f" $value }} seconds,
|
|
which is above the threshold of 0.10000000000000001 seconds.
|
|
summary: InfluxQL execution times are too slow. Slow query execution times will
|
|
negatively affect system performance, degrading data availability and precision.
|
|
expr: |
|
|
sum without(result) (increase(influxql_service_executing_duration_seconds_sum[5m])/clamp_min(increase(influxql_service_requests_total[5m]), 1)) >= 0.10000000000000001
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|