assets,site/content: daily assets regeneration

2024-12-14 11:37:31 +00:00 · 2020-11-13 03:17:29 +00:00 · 2020-11-13 03:17:29 +00:00 · 9105d0ab1d
commit 9105d0ab1d
parent efa80a72eb
6 changed files with 62 additions and 62 deletions
--- a/assets/prometheus/alerts.yaml
+++ b/assets/prometheus/alerts.yaml
@ -46,22 +46,6 @@ groups:
    for: 15m
    labels:
      severity: warning
-  - alert: PrometheusErrorSendingAlertsToAnyAlertmanager
-    annotations:
-      description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts
-        from Prometheus {{$labels.instance}} to any Alertmanager.'
-      summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
-    expr: |
-      min without(alertmanager) (
-        rate(prometheus_notifications_errors_total{job="prometheus"}[5m])
-      /
-        rate(prometheus_notifications_sent_total{job="prometheus"}[5m])
-      )
-      * 100
-      > 3
-    for: 15m
-    labels:
-      severity: critical
  - alert: PrometheusNotConnectedToAlertmanagers
    annotations:
      description: Prometheus {{$labels.instance}} is not connected to any Alertmanagers.
@ -217,3 +201,19 @@ groups:
    for: 15m
    labels:
      severity: warning
+  - alert: PrometheusErrorSendingAlertsToAnyAlertmanager
+    annotations:
+      description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts
+        from Prometheus {{$labels.instance}} to any Alertmanager.'
+      summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
+    expr: |
+      min without (alertmanager) (
+        rate(prometheus_notifications_errors_total{job="prometheus"}[5m])
+      /
+        rate(prometheus_notifications_sent_total{job="prometheus"}[5m])
+      )
+      * 100
+      > 3
+    for: 15m
+    labels:
+      severity: critical
--- a/assets/thanos/alerts.yaml
+++ b/assets/thanos/alerts.yaml
@ -14,7 +14,7 @@ groups:
    annotations:
      description: Thanos Compact {{$labels.job}} has failed to run and now is halted.
      summary: Thanos Compact has failed to run ans is now halted.
-    expr: thanos_compactor_halted{job=~"thanos-compact.*"} == 1
+    expr: thanos_compact_halted{job=~"thanos-compact.*"} == 1
    for: 5m
    labels:
      severity: warning
@ -123,9 +123,9 @@ groups:
      summary: Thanos Query is having high number of DNS failures.
    expr: |
      (
-        sum by (job) (rate(thanos_querier_store_apis_dns_failures_total{job=~"thanos-query.*"}[5m]))
+        sum by (job) (rate(thanos_query_store_apis_dns_failures_total{job=~"thanos-query.*"}[5m]))
      /
-        sum by (job) (rate(thanos_querier_store_apis_dns_lookups_total{job=~"thanos-query.*"}[5m]))
+        sum by (job) (rate(thanos_query_store_apis_dns_lookups_total{job=~"thanos-query.*"}[5m]))
      ) * 100 > 1
    for: 15m
    labels:
@ -436,9 +436,9 @@ groups:
      summary: Thanos Rule is having high number of DNS failures.
    expr: |
      (
-        sum by (job) (rate(thanos_ruler_query_apis_dns_failures_total{job=~"thanos-rule.*"}[5m]))
+        sum by (job) (rate(thanos_rule_query_apis_dns_failures_total{job=~"thanos-rule.*"}[5m]))
      /
-        sum by (job) (rate(thanos_ruler_query_apis_dns_lookups_total{job=~"thanos-rule.*"}[5m]))
+        sum by (job) (rate(thanos_rule_query_apis_dns_lookups_total{job=~"thanos-rule.*"}[5m]))
      * 100 > 1
      )
    for: 15m
@ -451,9 +451,9 @@ groups:
      summary: Thanos Rule is having high number of DNS failures.
    expr: |
      (
-        sum by (job) (rate(thanos_ruler_alertmanagers_dns_failures_total{job=~"thanos-rule.*"}[5m]))
+        sum by (job) (rate(thanos_rule_alertmanagers_dns_failures_total{job=~"thanos-rule.*"}[5m]))
      /
-        sum by (job) (rate(thanos_ruler_alertmanagers_dns_lookups_total{job=~"thanos-rule.*"}[5m]))
+        sum by (job) (rate(thanos_rule_alertmanagers_dns_lookups_total{job=~"thanos-rule.*"}[5m]))
      * 100 > 1
      )
    for: 15m
--- a/assets/thanos/dashboards/query.json
+++ b/assets/thanos/dashboards/query.json
@ -1144,7 +1144,7 @@
               "steppedLine": false,
               "targets": [
                  {
-                     "expr": "sum(rate(thanos_querier_store_apis_dns_lookups_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)",
+                     "expr": "sum(rate(thanos_query_store_apis_dns_lookups_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)",
                     "format": "time_series",
                     "intervalFactor": 2,
                     "legendFormat": "lookups {{job}}",
@ -1223,7 +1223,7 @@
               "steppedLine": false,
               "targets": [
                  {
-                     "expr": "sum(rate(thanos_querier_store_apis_dns_failures_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) / sum(rate(thanos_querier_store_apis_dns_lookups_total{namespace=\"$namespace\",job=~\"$job\"}[$interval]))",
+                     "expr": "sum(rate(thanos_query_store_apis_dns_failures_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) / sum(rate(thanos_query_store_apis_dns_lookups_total{namespace=\"$namespace\",job=~\"$job\"}[$interval]))",
                     "format": "time_series",
                     "intervalFactor": 2,
                     "legendFormat": "error",
--- a/assets/thanos/rules.yaml
+++ b/assets/thanos/rules.yaml
@ -17,11 +17,11 @@ groups:
    record: :grpc_client_failures_per_stream:sum_rate
  - expr: |
      (
-        sum(rate(thanos_querier_store_apis_dns_failures_total{job=~"thanos-query.*"}[5m]))
+        sum(rate(thanos_query_store_apis_dns_failures_total{job=~"thanos-query.*"}[5m]))
      /
-        sum(rate(thanos_querier_store_apis_dns_lookups_total{job=~"thanos-query.*"}[5m]))
+        sum(rate(thanos_query_store_apis_dns_lookups_total{job=~"thanos-query.*"}[5m]))
      )
-    record: :thanos_querier_store_apis_dns_failures_per_lookup:sum_rate
+    record: :thanos_query_store_apis_dns_failures_per_lookup:sum_rate
  - expr: |
      histogram_quantile(0.99,
        sum(rate(http_request_duration_seconds_bucket{job=~"thanos-query.*", handler="query"}[5m])) by (le)
--- a/site/content/prometheus/_index.md
+++ b/site/content/prometheus/_index.md
@ -82,29 +82,6 @@ labels:
  severity: warning
 {{< /code >}}
 
-##### PrometheusErrorSendingAlertsToAnyAlertmanager
-'{{ printf "%.1f" $value }}% minimum errors while sending alerts from
-Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
-
-{{< code lang="yaml" >}}
-alert: PrometheusErrorSendingAlertsToAnyAlertmanager
-annotations:
-  description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from
-    Prometheus {{$labels.instance}} to any Alertmanager.'
-  summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
-expr: |
-  min without(alertmanager) (
-    rate(prometheus_notifications_errors_total{job="prometheus"}[5m])
-  /
-    rate(prometheus_notifications_sent_total{job="prometheus"}[5m])
-  )
-  * 100
-  > 3
-for: 15m
-labels:
-  severity: critical
-{{< /code >}}
- 
 ##### PrometheusNotConnectedToAlertmanagers

 {{< code lang="yaml" >}}
@ -320,6 +297,29 @@ labels:
  severity: warning
 {{< /code >}}
 
+##### PrometheusErrorSendingAlertsToAnyAlertmanager
+'{{ printf "%.1f" $value }}% minimum errors while sending alerts from
+Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
+
+{{< code lang="yaml" >}}
+alert: PrometheusErrorSendingAlertsToAnyAlertmanager
+annotations:
+  description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from
+    Prometheus {{$labels.instance}} to any Alertmanager.'
+  summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
+expr: |
+  min without (alertmanager) (
+    rate(prometheus_notifications_errors_total{job="prometheus"}[5m])
+  /
+    rate(prometheus_notifications_sent_total{job="prometheus"}[5m])
+  )
+  * 100
+  > 3
+for: 15m
+labels:
+  severity: critical
+{{< /code >}}
+ 
 ## Dashboards
 Following dashboards are generated from mixins and hosted on github:

--- a/site/content/thanos/_index.md
+++ b/site/content/thanos/_index.md
@ -39,7 +39,7 @@ alert: ThanosCompactHalted
 annotations:
  description: Thanos Compact {{$labels.job}} has failed to run and now is halted.
  summary: Thanos Compact has failed to run ans is now halted.
-expr: thanos_compactor_halted{job=~"thanos-compact.*"} == 1
+expr: thanos_compact_halted{job=~"thanos-compact.*"} == 1
 for: 5m
 labels:
  severity: warning
@ -187,9 +187,9 @@ annotations:
  summary: Thanos Query is having high number of DNS failures.
 expr: |
  (
-    sum by (job) (rate(thanos_querier_store_apis_dns_failures_total{job=~"thanos-query.*"}[5m]))
+    sum by (job) (rate(thanos_query_store_apis_dns_failures_total{job=~"thanos-query.*"}[5m]))
  /
-    sum by (job) (rate(thanos_querier_store_apis_dns_lookups_total{job=~"thanos-query.*"}[5m]))
+    sum by (job) (rate(thanos_query_store_apis_dns_lookups_total{job=~"thanos-query.*"}[5m]))
  ) * 100 > 1
 for: 15m
 labels:
@ -618,9 +618,9 @@ annotations:
  summary: Thanos Rule is having high number of DNS failures.
 expr: |
  (
-    sum by (job) (rate(thanos_ruler_query_apis_dns_failures_total{job=~"thanos-rule.*"}[5m]))
+    sum by (job) (rate(thanos_rule_query_apis_dns_failures_total{job=~"thanos-rule.*"}[5m]))
  /
-    sum by (job) (rate(thanos_ruler_query_apis_dns_lookups_total{job=~"thanos-rule.*"}[5m]))
+    sum by (job) (rate(thanos_rule_query_apis_dns_lookups_total{job=~"thanos-rule.*"}[5m]))
  * 100 > 1
  )
 for: 15m
@ -638,9 +638,9 @@ annotations:
  summary: Thanos Rule is having high number of DNS failures.
 expr: |
  (
-    sum by (job) (rate(thanos_ruler_alertmanagers_dns_failures_total{job=~"thanos-rule.*"}[5m]))
+    sum by (job) (rate(thanos_rule_alertmanagers_dns_failures_total{job=~"thanos-rule.*"}[5m]))
  /
-    sum by (job) (rate(thanos_ruler_alertmanagers_dns_lookups_total{job=~"thanos-rule.*"}[5m]))
+    sum by (job) (rate(thanos_rule_alertmanagers_dns_lookups_total{job=~"thanos-rule.*"}[5m]))
  * 100 > 1
  )
 for: 15m
@ -868,16 +868,16 @@ expr: |
 record: :grpc_client_failures_per_stream:sum_rate
 {{< /code >}}
 
-##### :thanos_querier_store_apis_dns_failures_per_lookup:sum_rate
+##### :thanos_query_store_apis_dns_failures_per_lookup:sum_rate

 {{< code lang="yaml" >}}
 expr: |
  (
-    sum(rate(thanos_querier_store_apis_dns_failures_total{job=~"thanos-query.*"}[5m]))
+    sum(rate(thanos_query_store_apis_dns_failures_total{job=~"thanos-query.*"}[5m]))
  /
-    sum(rate(thanos_querier_store_apis_dns_lookups_total{job=~"thanos-query.*"}[5m]))
+    sum(rate(thanos_query_store_apis_dns_lookups_total{job=~"thanos-query.*"}[5m]))
  )
-record: :thanos_querier_store_apis_dns_failures_per_lookup:sum_rate
+record: :thanos_query_store_apis_dns_failures_per_lookup:sum_rate
 {{< /code >}}
 
 ##### :query_duration_seconds:histogram_quantile