From 04f72fdce55c571098001df1bf842f68768a25e3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 10 Aug 2022 03:41:49 +0000 Subject: [PATCH] assets,site/content: daily assets regeneration --- assets/thanos/alerts.yaml | 16 ++++++++++++++++ site/content/thanos/_index.md | 22 ++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/assets/thanos/alerts.yaml b/assets/thanos/alerts.yaml index 6aaf34b..be7b48d 100644 --- a/assets/thanos/alerts.yaml +++ b/assets/thanos/alerts.yaml @@ -170,6 +170,22 @@ groups: for: 10m labels: severity: critical + - alert: ThanosQueryOverload + annotations: + description: Thanos Query {{$labels.job}} has been overloaded for more than + 15 minutes. This may be a symptom of excessive simultanous complex requests, + low performance of the Prometheus API, or failures within these components. + Assess the health of the Thanos query instances, the connnected Prometheus + instances, look for potential senders of these requests and then contact support. + runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryoverload + summary: Thanos query reaches its maximum capacity serving concurrent requests. + expr: | + ( + max_over_time(thanos_query_concurrent_gate_queries_max[5m]) - avg_over_time(thanos_query_concurrent_gate_queries_in_flight[5m]) < 1 + ) + for: 15m + labels: + severity: warning - name: thanos-receive rules: - alert: ThanosReceiveHttpRequestErrorRateHigh diff --git a/site/content/thanos/_index.md b/site/content/thanos/_index.md index 865a150..c412727 100644 --- a/site/content/thanos/_index.md +++ b/site/content/thanos/_index.md @@ -258,6 +258,28 @@ labels: severity: critical {{< /code >}} +##### ThanosQueryOverload +https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryoverload + +{{< code lang="yaml" >}} +alert: ThanosQueryOverload +annotations: + description: Thanos Query {{$labels.job}} has been overloaded for more than 15 minutes. + This may be a symptom of excessive simultanous complex requests, low performance + of the Prometheus API, or failures within these components. Assess the health + of the Thanos query instances, the connnected Prometheus instances, look for potential + senders of these requests and then contact support. + runbook_url: https://github.com/thanos-io/thanos/tree/main/mixin/runbook.md#alert-name-thanosqueryoverload + summary: Thanos query reaches its maximum capacity serving concurrent requests. +expr: | + ( + max_over_time(thanos_query_concurrent_gate_queries_max[5m]) - avg_over_time(thanos_query_concurrent_gate_queries_in_flight[5m]) < 1 + ) +for: 15m +labels: + severity: warning +{{< /code >}} + ### thanos-receive ##### ThanosReceiveHttpRequestErrorRateHigh