1
0
Fork 0
mirror of https://github.com/monitoring-mixins/website.git synced 2024-12-14 11:37:31 +00:00
monitoring-mixins-website/assets/gitlab/alerts.yaml
Vitaly Zhuravlev b3b400137a Add jsonnet-libs mixins
Add blackbox exporter
Add mysql exporter
2024-05-04 12:01:41 +00:00

48 lines
2.1 KiB
YAML

groups:
- name: GitLabAlerts
rules:
- alert: GitLabHighJobRegistrationFailures
annotations:
description: '{{ printf "%.2f" $value }}% of job registrations have failed on
{{$labels.instance}}, which is above threshold of 10%.'
summary: Large percentage of failed attempts to register a job.
expr: "100 * rate(job_register_attempts_failed_total{}[5m]) / rate(job_register_attempts_total{}[5m])
\n> 10\n"
for: 5m
labels:
severity: warning
- alert: GitLabHighRunnerAuthFailure
annotations:
description: '{{ printf "%.2f" $value }}% of GitLab runner authentication attempts
are failing on {{$labels.instance}}, which is above the threshold of 10%.'
summary: Large percentage of runner authentication failures.
expr: "100 * sum by (instance) (rate(gitlab_ci_runner_authentication_failure_total{}[5m]))
\ / \n(sum by (instance) (rate(gitlab_ci_runner_authentication_success_total{}[5m]))
\ + sum by (instance) (rate(gitlab_ci_runner_authentication_failure_total{}[5m])))\n>
10\n"
for: 5m
labels:
severity: warning
- alert: GitLabHigh5xxResponses
annotations:
description: '{{ printf "%.2f" $value }}% of all requests returned 5XX HTTP
responses, which is above the threshold 10%, indicating a system issue on
{{$labels.instance}}.'
summary: Large rate of HTTP 5XX errors.
expr: "100 * sum by (instance) (rate(http_requests_total{status=~\"^5.*\"}[5m]))
/ sum by (instance) (rate(http_requests_total{}[5m])) \n> 10\n"
for: 5m
labels:
severity: critical
- alert: GitLabHigh4xxResponses
annotations:
description: '{{ printf "%.2f" $value }}% of all requests returned 4XX HTTP
responses, which is above the threshold 10%, indicating many failed requests
on {{$labels.instance}}.'
summary: Large rate of HTTP 4XX errors.
expr: |
100 * sum by (instance) (rate(http_requests_total{status=~"^4.*"}[5m])) / sum by (instance) (rate(http_requests_total{}[5m]))
> 10
for: 5m
labels:
severity: warning