assets,site/content: regenerate

2024-12-14 11:37:31 +00:00 · 2020-08-13 12:50:10 +02:00 · 2020-08-13 12:50:10 +02:00 · 7fd2bee5a7
commit 7fd2bee5a7
parent df43594957
25 changed files with 1134 additions and 535 deletions
--- a/assets/ceph/alerts.yaml
+++ b/assets/ceph/alerts.yaml
@ -27,7 +27,8 @@ groups:
  rules:
  - alert: CephMdsMissingReplicas
    annotations:
-      description: Minimum required replicas for storage metadata service not available. Might affect the working of storage cluster.
+      description: Minimum required replicas for storage metadata service not available.
        Might affect the working of storage cluster.
      message: Insufficient replicas for storage metadata service.
      severity_level: warning
      storage_type: ceph
@ -51,7 +52,8 @@ groups:
      severity: critical
  - alert: CephMonHighNumberOfLeaderChanges
    annotations:
-      description: Ceph Monitor {{ $labels.ceph_daemon }} on host {{ $labels.hostname }} has seen {{ $value | printf "%.2f" }} leader changes per minute recently.
+      description: Ceph Monitor {{ $labels.ceph_daemon }} on host {{ $labels.hostname
        }} has seen {{ $value | printf "%.2f" }} leader changes per minute recently.
      message: Storage Cluster has seen many leader changes recently.
      severity_level: warning
      storage_type: ceph
@ -64,7 +66,8 @@ groups:
  rules:
  - alert: CephNodeDown
    annotations:
-      description: Storage node {{ $labels.node }} went down. Please check the node immediately.
+      description: Storage node {{ $labels.node }} went down. Please check the node
        immediately.
      message: Storage node {{ $labels.node }} went down
      severity_level: error
      storage_type: ceph
@ -77,7 +80,9 @@ groups:
  rules:
  - alert: CephOSDCriticallyFull
    annotations:
-      description: Utilization of back-end storage device {{ $labels.ceph_daemon }} has crossed 85% on host {{ $labels.hostname }}. Immediately free up some space or expand the storage cluster or contact support.
+      description: Utilization of back-end storage device {{ $labels.ceph_daemon }}
        has crossed 85% on host {{ $labels.hostname }}. Immediately free up some space
        or expand the storage cluster or contact support.
      message: Back-end storage device is critically full.
      severity_level: error
      storage_type: ceph
@ -88,7 +93,9 @@ groups:
      severity: critical
  - alert: CephOSDNearFull
    annotations:
-      description: Utilization of back-end storage device {{ $labels.ceph_daemon }} has crossed 75% on host {{ $labels.hostname }}. Free up some space or expand the storage cluster or contact support.
+      description: Utilization of back-end storage device {{ $labels.ceph_daemon }}
        has crossed 75% on host {{ $labels.hostname }}. Free up some space or expand
        the storage cluster or contact support.
      message: Back-end storage device is nearing full.
      severity_level: warning
      storage_type: ceph
@ -99,7 +106,8 @@ groups:
      severity: warning
  - alert: CephOSDDiskNotResponding
    annotations:
-      description: Disk device {{ $labels.device }} not responding, on host {{ $labels.host }}.
+      description: Disk device {{ $labels.device }} not responding, on host {{ $labels.host
        }}.
      message: Disk not responding
      severity_level: error
      storage_type: ceph
@ -110,7 +118,8 @@ groups:
      severity: critical
  - alert: CephOSDDiskUnavailable
    annotations:
-      description: Disk device {{ $labels.device }} not accessible on host {{ $labels.host }}.
+      description: Disk device {{ $labels.device }} not accessible on host {{ $labels.host
        }}.
      message: Disk not accessible
      severity_level: error
      storage_type: ceph
@ -145,8 +154,10 @@ groups:
  rules:
  - alert: PersistentVolumeUsageNearFull
    annotations:
-      description: PVC {{ $labels.persistentvolumeclaim }} utilization has crossed 75%. Free up some space or expand the PVC.
+      description: PVC {{ $labels.persistentvolumeclaim }} utilization has crossed
-      message: PVC {{ $labels.persistentvolumeclaim }} is nearing full. Data deletion or PVC expansion is required.
+        75%. Free up some space or expand the PVC.
      message: PVC {{ $labels.persistentvolumeclaim }} is nearing full. Data deletion
        or PVC expansion is required.
      severity_level: warning
      storage_type: ceph
    expr: |
@ -156,8 +167,10 @@ groups:
      severity: warning
  - alert: PersistentVolumeUsageCritical
    annotations:
-      description: PVC {{ $labels.persistentvolumeclaim }} utilization has crossed 85%. Free up some space or expand the PVC immediately.
+      description: PVC {{ $labels.persistentvolumeclaim }} utilization has crossed
-      message: PVC {{ $labels.persistentvolumeclaim }} is critically full. Data deletion or PVC expansion is required.
+        85%. Free up some space or expand the PVC immediately.
      message: PVC {{ $labels.persistentvolumeclaim }} is critically full. Data deletion
        or PVC expansion is required.
      severity_level: error
      storage_type: ceph
    expr: |
@ -191,7 +204,8 @@ groups:
      severity: warning
  - alert: CephOSDVersionMismatch
    annotations:
-      description: There are {{ $value }} different versions of Ceph OSD components running.
+      description: There are {{ $value }} different versions of Ceph OSD components
        running.
      message: There are multiple versions of storage services running.
      severity_level: warning
      storage_type: ceph
@ -202,7 +216,8 @@ groups:
      severity: warning
  - alert: CephMonVersionMismatch
    annotations:
-      description: There are {{ $value }} different versions of Ceph Mon components running.
+      description: There are {{ $value }} different versions of Ceph Mon components
        running.
      message: There are multiple versions of storage services running.
      severity_level: warning
      storage_type: ceph
@ -215,8 +230,10 @@ groups:
  rules:
  - alert: CephClusterNearFull
    annotations:
-      description: Storage cluster utilization has crossed 75% and will become read-only at 85%. Free up some space or expand the storage cluster.
+      description: Storage cluster utilization has crossed 75% and will become read-only
-      message: Storage cluster is nearing full. Data deletion or cluster expansion is required.
+        at 85%. Free up some space or expand the storage cluster.
      message: Storage cluster is nearing full. Data deletion or cluster expansion
        is required.
      severity_level: warning
      storage_type: ceph
    expr: |
@ -226,8 +243,10 @@ groups:
      severity: warning
  - alert: CephClusterCriticallyFull
    annotations:
-      description: Storage cluster utilization has crossed 80% and will become read-only at 85%. Free up some space or expand the storage cluster immediately.
+      description: Storage cluster utilization has crossed 80% and will become read-only
-      message: Storage cluster is critically full and needs immediate data deletion or cluster expansion.
+        at 85%. Free up some space or expand the storage cluster immediately.
      message: Storage cluster is critically full and needs immediate data deletion
        or cluster expansion.
      severity_level: error
      storage_type: ceph
    expr: |
@ -237,8 +256,10 @@ groups:
      severity: critical
  - alert: CephClusterReadOnly
    annotations:
-      description: Storage cluster utilization has crossed 85% and will become read-only now. Free up some space or expand the storage cluster immediately.
+      description: Storage cluster utilization has crossed 85% and will become read-only
-      message: Storage cluster is read-only now and needs immediate data deletion or cluster expansion.
+        now. Free up some space or expand the storage cluster immediately.
      message: Storage cluster is read-only now and needs immediate data deletion
        or cluster expansion.
      severity_level: error
      storage_type: ceph
    expr: |
--- a/assets/coredns/alerts.yaml
+++ b/assets/coredns/alerts.yaml
@ -12,7 +12,8 @@ groups:
      severity: critical
  - alert: CoreDNSLatencyHigh
    annotations:
-      message: CoreDNS has 99th percentile latency of {{ $value }} seconds for server {{ $labels.server }} zone {{ $labels.zone }} .
+      message: CoreDNS has 99th percentile latency of {{ $value }} seconds for server
        {{ $labels.server }} zone {{ $labels.zone }} .
      runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednslatencyhigh
    expr: |
      histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_seconds_bucket{job="kube-dns"}[5m])) by(server, zone, le)) > 4
@ -21,7 +22,8 @@ groups:
      severity: critical
  - alert: CoreDNSErrorsHigh
    annotations:
-      message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of requests.
+      message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }}
        of requests.
      runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednserrorshigh
    expr: |
      sum(rate(coredns_dns_response_rcode_count_total{job="kube-dns",rcode="SERVFAIL"}[5m]))
@ -32,7 +34,8 @@ groups:
      severity: critical
  - alert: CoreDNSErrorsHigh
    annotations:
-      message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of requests.
+      message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }}
        of requests.
      runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednserrorshigh
    expr: |
      sum(rate(coredns_dns_response_rcode_count_total{job="kube-dns",rcode="SERVFAIL"}[5m]))
@ -45,7 +48,8 @@ groups:
  rules:
  - alert: CoreDNSForwardLatencyHigh
    annotations:
-      message: CoreDNS has 99th percentile latency of {{ $value }} seconds forwarding requests to {{ $labels.to }}.
+      message: CoreDNS has 99th percentile latency of {{ $value }} seconds forwarding
        requests to {{ $labels.to }}.
      runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsforwardlatencyhigh
    expr: |
      histogram_quantile(0.99, sum(rate(coredns_forward_request_duration_seconds_bucket{job="kube-dns"}[5m])) by(to, le)) > 4
@ -54,7 +58,8 @@ groups:
      severity: critical
  - alert: CoreDNSForwardErrorsHigh
    annotations:
-      message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of forward requests to {{ $labels.to }}.
+      message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }}
        of forward requests to {{ $labels.to }}.
      runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsforwarderrorshigh
    expr: |
      sum(rate(coredns_forward_response_rcode_count_total{job="kube-dns",rcode="SERVFAIL"}[5m]))
@ -65,7 +70,8 @@ groups:
      severity: critical
  - alert: CoreDNSForwardErrorsHigh
    annotations:
-      message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of forward requests to {{ $labels.to }}.
+      message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }}
        of forward requests to {{ $labels.to }}.
      runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsforwarderrorshigh
    expr: |
      sum(rate(coredns_dns_response_rcode_count_total{job="kube-dns",rcode="SERVFAIL"}[5m]))
--- a/assets/cortex/alerts.yaml
+++ b/assets/cortex/alerts.yaml
@ -107,7 +107,8 @@ groups:
      severity: warning
  - alert: CortexIngesterRestarts
    annotations:
-      message: '{{ $labels.job }}/{{ $labels.instance }} has restarted {{ printf "%.2f" $value }} times in the last 30 mins.'
+      message: '{{ $labels.job }}/{{ $labels.instance }} has restarted {{ printf "%.2f"
        $value }} times in the last 30 mins.'
    expr: |
      changes(process_start_time_seconds{job=~".+(cortex|ingester)"}[30m]) > 1
    labels:
@ -278,7 +279,8 @@ groups:
  rules:
  - alert: CortexGossipMembersMismatch
    annotations:
-      message: '{{ $labels.job }}/{{ $labels.instance }} sees incorrect number of gossip members.'
+      message: '{{ $labels.job }}/{{ $labels.instance }} sees incorrect number of
        gossip members.'
    expr: |
      memberlist_client_cluster_members_count
        != on (cluster, namespace) group_left
@ -290,7 +292,8 @@ groups:
  rules:
  - alert: CortexIngesterHasNotShippedBlocks
    annotations:
-      message: Cortex Ingester {{ $labels.namespace }}/{{ $labels.instance }} has not shipped any block in the last 4 hours.
+      message: Cortex Ingester {{ $labels.namespace }}/{{ $labels.instance }} has
        not shipped any block in the last 4 hours.
    expr: |
      (min by(namespace, instance) (time() - thanos_objstore_bucket_last_successful_upload_time{job=~".+/ingester"}) > 60 * 60 * 4)
      and
@ -302,7 +305,8 @@ groups:
      severity: critical
  - alert: CortexIngesterHasNotShippedBlocksSinceStart
    annotations:
-      message: Cortex Ingester {{ $labels.namespace }}/{{ $labels.instance }} has not shipped any block in the last 4 hours.
+      message: Cortex Ingester {{ $labels.namespace }}/{{ $labels.instance }} has
        not shipped any block in the last 4 hours.
    expr: |
      (max by(namespace, instance) (thanos_objstore_bucket_last_successful_upload_time{job=~".+/ingester"}) == 0)
      and
@ -312,7 +316,8 @@ groups:
      severity: critical
  - alert: CortexIngesterTSDBHeadCompactionFailed
    annotations:
-      message: Cortex Ingester {{ $labels.namespace }}/{{ $labels.instance }} is failing to compact TSDB head.
+      message: Cortex Ingester {{ $labels.namespace }}/{{ $labels.instance }} is failing
        to compact TSDB head.
    expr: |
      rate(cortex_ingester_tsdb_compactions_failed_total[5m]) > 0
    for: 15m
@ -320,7 +325,8 @@ groups:
      severity: critical
  - alert: CortexQuerierHasNotScanTheBucket
    annotations:
-      message: Cortex Querier {{ $labels.namespace }}/{{ $labels.instance }} has not successfully scanned the bucket since {{ $value | humanizeDuration }}.
+      message: Cortex Querier {{ $labels.namespace }}/{{ $labels.instance }} has not
        successfully scanned the bucket since {{ $value | humanizeDuration }}.
    expr: |
      (time() - cortex_querier_blocks_last_successful_scan_timestamp_seconds > 60 * 30)
      and
@ -330,7 +336,9 @@ groups:
      severity: critical
  - alert: CortexQuerierHighRefetchRate
    annotations:
-      message: Cortex Queries in {{ $labels.namespace }} are refetching series from different store-gateways (because of missing blocks) for the {{ printf "%.0f" $value }}% of queries.
+      message: Cortex Queries in {{ $labels.namespace }} are refetching series from
        different store-gateways (because of missing blocks) for the {{ printf "%.0f"
        $value }}% of queries.
    expr: |
      100 * (
        (
@ -347,7 +355,9 @@ groups:
      severity: warning
  - alert: CortexStoreGatewayHasNotSyncTheBucket
    annotations:
-      message: Cortex Store Gateway {{ $labels.namespace }}/{{ $labels.instance }} has not successfully synched the bucket since {{ $value | humanizeDuration }}.
+      message: Cortex Store Gateway {{ $labels.namespace }}/{{ $labels.instance }}
        has not successfully synched the bucket since {{ $value | humanizeDuration
        }}.
    expr: |
      (time() - cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component="store-gateway"} > 60 * 30)
      and
@ -359,7 +369,8 @@ groups:
  rules:
  - alert: CortexCompactorHasNotSuccessfullyCleanedUpBlocks
    annotations:
-      message: Cortex Compactor {{ $labels.namespace }}/{{ $labels.instance }} has not successfully cleaned up blocks in the last 24 hours.
+      message: Cortex Compactor {{ $labels.namespace }}/{{ $labels.instance }} has
        not successfully cleaned up blocks in the last 24 hours.
    expr: |
      (time() - cortex_compactor_block_cleanup_last_successful_run_timestamp_seconds > 60 * 60 * 24)
      and
@ -369,7 +380,8 @@ groups:
      severity: critical
  - alert: CortexCompactorHasNotSuccessfullyCleanedUpBlocksSinceStart
    annotations:
-      message: Cortex Compactor {{ $labels.namespace }}/{{ $labels.instance }} has not successfully cleaned up blocks in the last 24 hours.
+      message: Cortex Compactor {{ $labels.namespace }}/{{ $labels.instance }} has
        not successfully cleaned up blocks in the last 24 hours.
    expr: |
      cortex_compactor_block_cleanup_last_successful_run_timestamp_seconds == 0
    for: 24h
@ -377,7 +389,8 @@ groups:
      severity: critical
  - alert: CortexCompactorHasNotUploadedBlocks
    annotations:
-      message: Cortex Compactor {{ $labels.namespace }}/{{ $labels.instance }} has not uploaded any block in the last 24 hours.
+      message: Cortex Compactor {{ $labels.namespace }}/{{ $labels.instance }} has
        not uploaded any block in the last 24 hours.
    expr: |
      (time() - thanos_objstore_bucket_last_successful_upload_time{job=~".+/compactor"} > 60 * 60 * 24)
      and
@ -387,7 +400,8 @@ groups:
      severity: critical
  - alert: CortexCompactorHasNotUploadedBlocksSinceStart
    annotations:
-      message: Cortex Compactor {{ $labels.namespace }}/{{ $labels.instance }} has not uploaded any block in the last 24 hours.
+      message: Cortex Compactor {{ $labels.namespace }}/{{ $labels.instance }} has
        not uploaded any block in the last 24 hours.
    expr: |
      thanos_objstore_bucket_last_successful_upload_time{job=~".+/compactor"} == 0
    for: 24h
--- a/assets/cortex/rules.yaml
+++ b/assets/cortex/rules.yaml
@ -1,11 +1,14 @@
 groups:
 - name: cortex_api
  rules:
-  - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m]))
      by (le, cluster, job))
    record: cluster_job:cortex_request_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m]))
      by (le, cluster, job))
    record: cluster_job:cortex_request_duration_seconds:50quantile
-  - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(cortex_request_duration_seconds_count[1m]))
      by (cluster, job)
    record: cluster_job:cortex_request_duration_seconds:avg
  - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job)
    record: cluster_job:cortex_request_duration_seconds_bucket:sum_rate
@ -13,185 +16,279 @@ groups:
    record: cluster_job:cortex_request_duration_seconds_sum:sum_rate
  - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job)
    record: cluster_job:cortex_request_duration_seconds_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job, route))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m]))
      by (le, cluster, job, route))
    record: cluster_job_route:cortex_request_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job, route))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m]))
      by (le, cluster, job, route))
    record: cluster_job_route:cortex_request_duration_seconds:50quantile
-  - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route) / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, route)
+  - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route)
      / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, route)
    record: cluster_job_route:cortex_request_duration_seconds:avg
-  - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job, route)
+  - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job,
      route)
    record: cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate
  - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route)
    record: cluster_job_route:cortex_request_duration_seconds_sum:sum_rate
  - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, route)
    record: cluster_job_route:cortex_request_duration_seconds_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m]))
      by (le, cluster, namespace, job, route))
    record: cluster_namespace_job_route:cortex_request_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m]))
      by (le, cluster, namespace, job, route))
    record: cluster_namespace_job_route:cortex_request_duration_seconds:50quantile
-  - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace, job, route) / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, namespace, job, route)
+  - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace,
      job, route) / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster,
      namespace, job, route)
    record: cluster_namespace_job_route:cortex_request_duration_seconds:avg
-  - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route)
+  - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, namespace,
      job, route)
    record: cluster_namespace_job_route:cortex_request_duration_seconds_bucket:sum_rate
-  - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace, job, route)
+  - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace,
      job, route)
    record: cluster_namespace_job_route:cortex_request_duration_seconds_sum:sum_rate
-  - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, namespace, job, route)
+  - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, namespace,
      job, route)
    record: cluster_namespace_job_route:cortex_request_duration_seconds_count:sum_rate
 - name: cortex_cache
  rules:
-  - expr: histogram_quantile(0.99, sum(rate(cortex_memcache_request_duration_seconds_bucket[1m])) by (le, cluster, job, method))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_memcache_request_duration_seconds_bucket[1m]))
      by (le, cluster, job, method))
    record: cluster_job_method:cortex_memcache_request_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_memcache_request_duration_seconds_bucket[1m])) by (le, cluster, job, method))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_memcache_request_duration_seconds_bucket[1m]))
      by (le, cluster, job, method))
    record: cluster_job_method:cortex_memcache_request_duration_seconds:50quantile
-  - expr: sum(rate(cortex_memcache_request_duration_seconds_sum[1m])) by (cluster, job, method) / sum(rate(cortex_memcache_request_duration_seconds_count[1m])) by (cluster, job, method)
+  - expr: sum(rate(cortex_memcache_request_duration_seconds_sum[1m])) by (cluster,
      job, method) / sum(rate(cortex_memcache_request_duration_seconds_count[1m]))
      by (cluster, job, method)
    record: cluster_job_method:cortex_memcache_request_duration_seconds:avg
-  - expr: sum(rate(cortex_memcache_request_duration_seconds_bucket[1m])) by (le, cluster, job, method)
+  - expr: sum(rate(cortex_memcache_request_duration_seconds_bucket[1m])) by (le, cluster,
      job, method)
    record: cluster_job_method:cortex_memcache_request_duration_seconds_bucket:sum_rate
-  - expr: sum(rate(cortex_memcache_request_duration_seconds_sum[1m])) by (cluster, job, method)
+  - expr: sum(rate(cortex_memcache_request_duration_seconds_sum[1m])) by (cluster,
      job, method)
    record: cluster_job_method:cortex_memcache_request_duration_seconds_sum:sum_rate
-  - expr: sum(rate(cortex_memcache_request_duration_seconds_count[1m])) by (cluster, job, method)
+  - expr: sum(rate(cortex_memcache_request_duration_seconds_count[1m])) by (cluster,
      job, method)
    record: cluster_job_method:cortex_memcache_request_duration_seconds_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_cache_request_duration_seconds_bucket[1m]))
      by (le, cluster, job))
    record: cluster_job:cortex_cache_request_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_cache_request_duration_seconds_bucket[1m]))
      by (le, cluster, job))
    record: cluster_job:cortex_cache_request_duration_seconds:50quantile
-  - expr: sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job)
      / sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster, job)
    record: cluster_job:cortex_cache_request_duration_seconds:avg
-  - expr: sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster, job)
+  - expr: sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster,
      job)
    record: cluster_job:cortex_cache_request_duration_seconds_bucket:sum_rate
  - expr: sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job)
    record: cluster_job:cortex_cache_request_duration_seconds_sum:sum_rate
-  - expr: sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster,
      job)
    record: cluster_job:cortex_cache_request_duration_seconds_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster, job, method))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_cache_request_duration_seconds_bucket[1m]))
      by (le, cluster, job, method))
    record: cluster_job_method:cortex_cache_request_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster, job, method))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_cache_request_duration_seconds_bucket[1m]))
      by (le, cluster, job, method))
    record: cluster_job_method:cortex_cache_request_duration_seconds:50quantile
-  - expr: sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job, method) / sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster, job, method)
+  - expr: sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job,
      method) / sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster,
      job, method)
    record: cluster_job_method:cortex_cache_request_duration_seconds:avg
-  - expr: sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster, job, method)
+  - expr: sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster,
      job, method)
    record: cluster_job_method:cortex_cache_request_duration_seconds_bucket:sum_rate
-  - expr: sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job, method)
+  - expr: sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job,
      method)
    record: cluster_job_method:cortex_cache_request_duration_seconds_sum:sum_rate
-  - expr: sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster, job, method)
+  - expr: sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster,
      job, method)
    record: cluster_job_method:cortex_cache_request_duration_seconds_count:sum_rate
 - name: cortex_storage
  rules:
-  - expr: histogram_quantile(0.99, sum(rate(cortex_bigtable_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_bigtable_request_duration_seconds_bucket[1m]))
      by (le, cluster, job, operation))
    record: cluster_job_operation:cortex_bigtable_request_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_bigtable_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_bigtable_request_duration_seconds_bucket[1m]))
      by (le, cluster, job, operation))
    record: cluster_job_operation:cortex_bigtable_request_duration_seconds:50quantile
-  - expr: sum(rate(cortex_bigtable_request_duration_seconds_sum[1m])) by (cluster, job, operation) / sum(rate(cortex_bigtable_request_duration_seconds_count[1m])) by (cluster, job, operation)
+  - expr: sum(rate(cortex_bigtable_request_duration_seconds_sum[1m])) by (cluster,
      job, operation) / sum(rate(cortex_bigtable_request_duration_seconds_count[1m]))
      by (cluster, job, operation)
    record: cluster_job_operation:cortex_bigtable_request_duration_seconds:avg
-  - expr: sum(rate(cortex_bigtable_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation)
+  - expr: sum(rate(cortex_bigtable_request_duration_seconds_bucket[1m])) by (le, cluster,
      job, operation)
    record: cluster_job_operation:cortex_bigtable_request_duration_seconds_bucket:sum_rate
-  - expr: sum(rate(cortex_bigtable_request_duration_seconds_sum[1m])) by (cluster, job, operation)
+  - expr: sum(rate(cortex_bigtable_request_duration_seconds_sum[1m])) by (cluster,
      job, operation)
    record: cluster_job_operation:cortex_bigtable_request_duration_seconds_sum:sum_rate
-  - expr: sum(rate(cortex_bigtable_request_duration_seconds_count[1m])) by (cluster, job, operation)
+  - expr: sum(rate(cortex_bigtable_request_duration_seconds_count[1m])) by (cluster,
      job, operation)
    record: cluster_job_operation:cortex_bigtable_request_duration_seconds_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_cassandra_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_cassandra_request_duration_seconds_bucket[1m]))
      by (le, cluster, job, operation))
    record: cluster_job_operation:cortex_cassandra_request_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_cassandra_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_cassandra_request_duration_seconds_bucket[1m]))
      by (le, cluster, job, operation))
    record: cluster_job_operation:cortex_cassandra_request_duration_seconds:50quantile
-  - expr: sum(rate(cortex_cassandra_request_duration_seconds_sum[1m])) by (cluster, job, operation) / sum(rate(cortex_cassandra_request_duration_seconds_count[1m])) by (cluster, job, operation)
+  - expr: sum(rate(cortex_cassandra_request_duration_seconds_sum[1m])) by (cluster,
      job, operation) / sum(rate(cortex_cassandra_request_duration_seconds_count[1m]))
      by (cluster, job, operation)
    record: cluster_job_operation:cortex_cassandra_request_duration_seconds:avg
-  - expr: sum(rate(cortex_cassandra_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation)
+  - expr: sum(rate(cortex_cassandra_request_duration_seconds_bucket[1m])) by (le,
      cluster, job, operation)
    record: cluster_job_operation:cortex_cassandra_request_duration_seconds_bucket:sum_rate
-  - expr: sum(rate(cortex_cassandra_request_duration_seconds_sum[1m])) by (cluster, job, operation)
+  - expr: sum(rate(cortex_cassandra_request_duration_seconds_sum[1m])) by (cluster,
      job, operation)
    record: cluster_job_operation:cortex_cassandra_request_duration_seconds_sum:sum_rate
-  - expr: sum(rate(cortex_cassandra_request_duration_seconds_count[1m])) by (cluster, job, operation)
+  - expr: sum(rate(cortex_cassandra_request_duration_seconds_count[1m])) by (cluster,
      job, operation)
    record: cluster_job_operation:cortex_cassandra_request_duration_seconds_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_dynamo_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_dynamo_request_duration_seconds_bucket[1m]))
      by (le, cluster, job, operation))
    record: cluster_job_operation:cortex_dynamo_request_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_dynamo_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_dynamo_request_duration_seconds_bucket[1m]))
      by (le, cluster, job, operation))
    record: cluster_job_operation:cortex_dynamo_request_duration_seconds:50quantile
-  - expr: sum(rate(cortex_dynamo_request_duration_seconds_sum[1m])) by (cluster, job, operation) / sum(rate(cortex_dynamo_request_duration_seconds_count[1m])) by (cluster, job, operation)
+  - expr: sum(rate(cortex_dynamo_request_duration_seconds_sum[1m])) by (cluster, job,
      operation) / sum(rate(cortex_dynamo_request_duration_seconds_count[1m])) by
      (cluster, job, operation)
    record: cluster_job_operation:cortex_dynamo_request_duration_seconds:avg
-  - expr: sum(rate(cortex_dynamo_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation)
+  - expr: sum(rate(cortex_dynamo_request_duration_seconds_bucket[1m])) by (le, cluster,
      job, operation)
    record: cluster_job_operation:cortex_dynamo_request_duration_seconds_bucket:sum_rate
-  - expr: sum(rate(cortex_dynamo_request_duration_seconds_sum[1m])) by (cluster, job, operation)
+  - expr: sum(rate(cortex_dynamo_request_duration_seconds_sum[1m])) by (cluster, job,
      operation)
    record: cluster_job_operation:cortex_dynamo_request_duration_seconds_sum:sum_rate
-  - expr: sum(rate(cortex_dynamo_request_duration_seconds_count[1m])) by (cluster, job, operation)
+  - expr: sum(rate(cortex_dynamo_request_duration_seconds_count[1m])) by (cluster,
      job, operation)
    record: cluster_job_operation:cortex_dynamo_request_duration_seconds_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_chunk_store_index_lookups_per_query_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_chunk_store_index_lookups_per_query_bucket[1m]))
      by (le, cluster, job))
    record: cluster_job:cortex_chunk_store_index_lookups_per_query:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_chunk_store_index_lookups_per_query_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_chunk_store_index_lookups_per_query_bucket[1m]))
      by (le, cluster, job))
    record: cluster_job:cortex_chunk_store_index_lookups_per_query:50quantile
-  - expr: sum(rate(cortex_chunk_store_index_lookups_per_query_sum[1m])) by (cluster, job) / sum(rate(cortex_chunk_store_index_lookups_per_query_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_chunk_store_index_lookups_per_query_sum[1m])) by (cluster,
      job) / sum(rate(cortex_chunk_store_index_lookups_per_query_count[1m])) by (cluster,
      job)
    record: cluster_job:cortex_chunk_store_index_lookups_per_query:avg
-  - expr: sum(rate(cortex_chunk_store_index_lookups_per_query_bucket[1m])) by (le, cluster, job)
+  - expr: sum(rate(cortex_chunk_store_index_lookups_per_query_bucket[1m])) by (le,
      cluster, job)
    record: cluster_job:cortex_chunk_store_index_lookups_per_query_bucket:sum_rate
-  - expr: sum(rate(cortex_chunk_store_index_lookups_per_query_sum[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_chunk_store_index_lookups_per_query_sum[1m])) by (cluster,
      job)
    record: cluster_job:cortex_chunk_store_index_lookups_per_query_sum:sum_rate
-  - expr: sum(rate(cortex_chunk_store_index_lookups_per_query_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_chunk_store_index_lookups_per_query_count[1m])) by (cluster,
      job)
    record: cluster_job:cortex_chunk_store_index_lookups_per_query_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_chunk_store_series_pre_intersection_per_query_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_chunk_store_series_pre_intersection_per_query_bucket[1m]))
      by (le, cluster, job))
    record: cluster_job:cortex_chunk_store_series_pre_intersection_per_query:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_chunk_store_series_pre_intersection_per_query_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_chunk_store_series_pre_intersection_per_query_bucket[1m]))
      by (le, cluster, job))
    record: cluster_job:cortex_chunk_store_series_pre_intersection_per_query:50quantile
-  - expr: sum(rate(cortex_chunk_store_series_pre_intersection_per_query_sum[1m])) by (cluster, job) / sum(rate(cortex_chunk_store_series_pre_intersection_per_query_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_chunk_store_series_pre_intersection_per_query_sum[1m]))
      by (cluster, job) / sum(rate(cortex_chunk_store_series_pre_intersection_per_query_count[1m]))
      by (cluster, job)
    record: cluster_job:cortex_chunk_store_series_pre_intersection_per_query:avg
-  - expr: sum(rate(cortex_chunk_store_series_pre_intersection_per_query_bucket[1m])) by (le, cluster, job)
+  - expr: sum(rate(cortex_chunk_store_series_pre_intersection_per_query_bucket[1m]))
      by (le, cluster, job)
    record: cluster_job:cortex_chunk_store_series_pre_intersection_per_query_bucket:sum_rate
-  - expr: sum(rate(cortex_chunk_store_series_pre_intersection_per_query_sum[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_chunk_store_series_pre_intersection_per_query_sum[1m]))
      by (cluster, job)
    record: cluster_job:cortex_chunk_store_series_pre_intersection_per_query_sum:sum_rate
-  - expr: sum(rate(cortex_chunk_store_series_pre_intersection_per_query_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_chunk_store_series_pre_intersection_per_query_count[1m]))
      by (cluster, job)
    record: cluster_job:cortex_chunk_store_series_pre_intersection_per_query_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_chunk_store_series_post_intersection_per_query_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_chunk_store_series_post_intersection_per_query_bucket[1m]))
      by (le, cluster, job))
    record: cluster_job:cortex_chunk_store_series_post_intersection_per_query:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_chunk_store_series_post_intersection_per_query_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_chunk_store_series_post_intersection_per_query_bucket[1m]))
      by (le, cluster, job))
    record: cluster_job:cortex_chunk_store_series_post_intersection_per_query:50quantile
-  - expr: sum(rate(cortex_chunk_store_series_post_intersection_per_query_sum[1m])) by (cluster, job) / sum(rate(cortex_chunk_store_series_post_intersection_per_query_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_chunk_store_series_post_intersection_per_query_sum[1m]))
      by (cluster, job) / sum(rate(cortex_chunk_store_series_post_intersection_per_query_count[1m]))
      by (cluster, job)
    record: cluster_job:cortex_chunk_store_series_post_intersection_per_query:avg
-  - expr: sum(rate(cortex_chunk_store_series_post_intersection_per_query_bucket[1m])) by (le, cluster, job)
+  - expr: sum(rate(cortex_chunk_store_series_post_intersection_per_query_bucket[1m]))
      by (le, cluster, job)
    record: cluster_job:cortex_chunk_store_series_post_intersection_per_query_bucket:sum_rate
-  - expr: sum(rate(cortex_chunk_store_series_post_intersection_per_query_sum[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_chunk_store_series_post_intersection_per_query_sum[1m]))
      by (cluster, job)
    record: cluster_job:cortex_chunk_store_series_post_intersection_per_query_sum:sum_rate
-  - expr: sum(rate(cortex_chunk_store_series_post_intersection_per_query_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_chunk_store_series_post_intersection_per_query_count[1m]))
      by (cluster, job)
    record: cluster_job:cortex_chunk_store_series_post_intersection_per_query_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_chunk_store_chunks_per_query_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_chunk_store_chunks_per_query_bucket[1m]))
      by (le, cluster, job))
    record: cluster_job:cortex_chunk_store_chunks_per_query:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_chunk_store_chunks_per_query_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_chunk_store_chunks_per_query_bucket[1m]))
      by (le, cluster, job))
    record: cluster_job:cortex_chunk_store_chunks_per_query:50quantile
-  - expr: sum(rate(cortex_chunk_store_chunks_per_query_sum[1m])) by (cluster, job) / sum(rate(cortex_chunk_store_chunks_per_query_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_chunk_store_chunks_per_query_sum[1m])) by (cluster, job)
      / sum(rate(cortex_chunk_store_chunks_per_query_count[1m])) by (cluster, job)
    record: cluster_job:cortex_chunk_store_chunks_per_query:avg
-  - expr: sum(rate(cortex_chunk_store_chunks_per_query_bucket[1m])) by (le, cluster, job)
+  - expr: sum(rate(cortex_chunk_store_chunks_per_query_bucket[1m])) by (le, cluster,
      job)
    record: cluster_job:cortex_chunk_store_chunks_per_query_bucket:sum_rate
  - expr: sum(rate(cortex_chunk_store_chunks_per_query_sum[1m])) by (cluster, job)
    record: cluster_job:cortex_chunk_store_chunks_per_query_sum:sum_rate
  - expr: sum(rate(cortex_chunk_store_chunks_per_query_count[1m])) by (cluster, job)
    record: cluster_job:cortex_chunk_store_chunks_per_query_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_database_request_duration_seconds_bucket[1m])) by (le, cluster, job, method))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_database_request_duration_seconds_bucket[1m]))
      by (le, cluster, job, method))
    record: cluster_job_method:cortex_database_request_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_database_request_duration_seconds_bucket[1m])) by (le, cluster, job, method))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_database_request_duration_seconds_bucket[1m]))
      by (le, cluster, job, method))
    record: cluster_job_method:cortex_database_request_duration_seconds:50quantile
-  - expr: sum(rate(cortex_database_request_duration_seconds_sum[1m])) by (cluster, job, method) / sum(rate(cortex_database_request_duration_seconds_count[1m])) by (cluster, job, method)
+  - expr: sum(rate(cortex_database_request_duration_seconds_sum[1m])) by (cluster,
      job, method) / sum(rate(cortex_database_request_duration_seconds_count[1m]))
      by (cluster, job, method)
    record: cluster_job_method:cortex_database_request_duration_seconds:avg
-  - expr: sum(rate(cortex_database_request_duration_seconds_bucket[1m])) by (le, cluster, job, method)
+  - expr: sum(rate(cortex_database_request_duration_seconds_bucket[1m])) by (le, cluster,
      job, method)
    record: cluster_job_method:cortex_database_request_duration_seconds_bucket:sum_rate
-  - expr: sum(rate(cortex_database_request_duration_seconds_sum[1m])) by (cluster, job, method)
+  - expr: sum(rate(cortex_database_request_duration_seconds_sum[1m])) by (cluster,
      job, method)
    record: cluster_job_method:cortex_database_request_duration_seconds_sum:sum_rate
-  - expr: sum(rate(cortex_database_request_duration_seconds_count[1m])) by (cluster, job, method)
+  - expr: sum(rate(cortex_database_request_duration_seconds_count[1m])) by (cluster,
      job, method)
    record: cluster_job_method:cortex_database_request_duration_seconds_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_gcs_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_gcs_request_duration_seconds_bucket[1m]))
      by (le, cluster, job, operation))
    record: cluster_job_operation:cortex_gcs_request_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_gcs_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_gcs_request_duration_seconds_bucket[1m]))
      by (le, cluster, job, operation))
    record: cluster_job_operation:cortex_gcs_request_duration_seconds:50quantile
-  - expr: sum(rate(cortex_gcs_request_duration_seconds_sum[1m])) by (cluster, job, operation) / sum(rate(cortex_gcs_request_duration_seconds_count[1m])) by (cluster, job, operation)
+  - expr: sum(rate(cortex_gcs_request_duration_seconds_sum[1m])) by (cluster, job,
      operation) / sum(rate(cortex_gcs_request_duration_seconds_count[1m])) by (cluster,
      job, operation)
    record: cluster_job_operation:cortex_gcs_request_duration_seconds:avg
-  - expr: sum(rate(cortex_gcs_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation)
+  - expr: sum(rate(cortex_gcs_request_duration_seconds_bucket[1m])) by (le, cluster,
      job, operation)
    record: cluster_job_operation:cortex_gcs_request_duration_seconds_bucket:sum_rate
-  - expr: sum(rate(cortex_gcs_request_duration_seconds_sum[1m])) by (cluster, job, operation)
+  - expr: sum(rate(cortex_gcs_request_duration_seconds_sum[1m])) by (cluster, job,
      operation)
    record: cluster_job_operation:cortex_gcs_request_duration_seconds_sum:sum_rate
-  - expr: sum(rate(cortex_gcs_request_duration_seconds_count[1m])) by (cluster, job, operation)
+  - expr: sum(rate(cortex_gcs_request_duration_seconds_count[1m])) by (cluster, job,
      operation)
    record: cluster_job_operation:cortex_gcs_request_duration_seconds_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket[1m]))
      by (le, cluster, job))
    record: cluster_job:cortex_kv_request_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket[1m]))
      by (le, cluster, job))
    record: cluster_job:cortex_kv_request_duration_seconds:50quantile
-  - expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job)
      / sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job)
    record: cluster_job:cortex_kv_request_duration_seconds:avg
-  - expr: sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) by (le, cluster, job)
+  - expr: sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) by (le, cluster,
      job)
    record: cluster_job:cortex_kv_request_duration_seconds_bucket:sum_rate
  - expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job)
    record: cluster_job:cortex_kv_request_duration_seconds_sum:sum_rate
@ -199,11 +296,14 @@ groups:
    record: cluster_job:cortex_kv_request_duration_seconds_count:sum_rate
 - name: cortex_queries
  rules:
-  - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket[1m]))
      by (le, cluster, job))
    record: cluster_job:cortex_query_frontend_retries:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket[1m]))
      by (le, cluster, job))
    record: cluster_job:cortex_query_frontend_retries:50quantile
-  - expr: sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job) / sum(rate(cortex_query_frontend_retries_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job) / sum(rate(cortex_query_frontend_retries_count[1m]))
      by (cluster, job)
    record: cluster_job:cortex_query_frontend_retries:avg
  - expr: sum(rate(cortex_query_frontend_retries_bucket[1m])) by (le, cluster, job)
    record: cluster_job:cortex_query_frontend_retries_bucket:sum_rate
@ -211,23 +311,33 @@ groups:
    record: cluster_job:cortex_query_frontend_retries_sum:sum_rate
  - expr: sum(rate(cortex_query_frontend_retries_count[1m])) by (cluster, job)
    record: cluster_job:cortex_query_frontend_retries_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m]))
      by (le, cluster, job))
    record: cluster_job:cortex_query_frontend_queue_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m]))
      by (le, cluster, job))
    record: cluster_job:cortex_query_frontend_queue_duration_seconds:50quantile
-  - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster,
      job) / sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by
      (cluster, job)
    record: cluster_job:cortex_query_frontend_queue_duration_seconds:avg
-  - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) by (le, cluster, job)
+  - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) by (le,
      cluster, job)
    record: cluster_job:cortex_query_frontend_queue_duration_seconds_bucket:sum_rate
-  - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster,
      job)
    record: cluster_job:cortex_query_frontend_queue_duration_seconds_sum:sum_rate
-  - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by (cluster,
      job)
    record: cluster_job:cortex_query_frontend_queue_duration_seconds_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_series_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_series_bucket[1m]))
      by (le, cluster, job))
    record: cluster_job:cortex_ingester_queried_series:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_series_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_series_bucket[1m]))
      by (le, cluster, job))
    record: cluster_job:cortex_ingester_queried_series:50quantile
-  - expr: sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job) / sum(rate(cortex_ingester_queried_series_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job) / sum(rate(cortex_ingester_queried_series_count[1m]))
      by (cluster, job)
    record: cluster_job:cortex_ingester_queried_series:avg
  - expr: sum(rate(cortex_ingester_queried_series_bucket[1m])) by (le, cluster, job)
    record: cluster_job:cortex_ingester_queried_series_bucket:sum_rate
@ -235,11 +345,14 @@ groups:
    record: cluster_job:cortex_ingester_queried_series_sum:sum_rate
  - expr: sum(rate(cortex_ingester_queried_series_count[1m])) by (cluster, job)
    record: cluster_job:cortex_ingester_queried_series_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_chunks_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_chunks_bucket[1m]))
      by (le, cluster, job))
    record: cluster_job:cortex_ingester_queried_chunks:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_chunks_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_chunks_bucket[1m]))
      by (le, cluster, job))
    record: cluster_job:cortex_ingester_queried_chunks:50quantile
-  - expr: sum(rate(cortex_ingester_queried_chunks_sum[1m])) by (cluster, job) / sum(rate(cortex_ingester_queried_chunks_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_ingester_queried_chunks_sum[1m])) by (cluster, job) / sum(rate(cortex_ingester_queried_chunks_count[1m]))
      by (cluster, job)
    record: cluster_job:cortex_ingester_queried_chunks:avg
  - expr: sum(rate(cortex_ingester_queried_chunks_bucket[1m])) by (le, cluster, job)
    record: cluster_job:cortex_ingester_queried_chunks_bucket:sum_rate
@ -247,11 +360,14 @@ groups:
    record: cluster_job:cortex_ingester_queried_chunks_sum:sum_rate
  - expr: sum(rate(cortex_ingester_queried_chunks_count[1m])) by (cluster, job)
    record: cluster_job:cortex_ingester_queried_chunks_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_samples_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_samples_bucket[1m]))
      by (le, cluster, job))
    record: cluster_job:cortex_ingester_queried_samples:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_samples_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_samples_bucket[1m]))
      by (le, cluster, job))
    record: cluster_job:cortex_ingester_queried_samples:50quantile
-  - expr: sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job) / sum(rate(cortex_ingester_queried_samples_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job) / sum(rate(cortex_ingester_queried_samples_count[1m]))
      by (cluster, job)
    record: cluster_job:cortex_ingester_queried_samples:avg
  - expr: sum(rate(cortex_ingester_queried_samples_bucket[1m])) by (le, cluster, job)
    record: cluster_job:cortex_ingester_queried_samples_bucket:sum_rate
--- a/assets/etcd/alerts.yaml
+++ b/assets/etcd/alerts.yaml
@ -18,7 +18,8 @@ groups:
      severity: critical
  - alert: etcdInsufficientMembers
    annotations:
-      message: 'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value }}).'
+      message: 'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value
        }}).'
    expr: |
      sum(up{job=~".*etcd.*"} == bool 1) without (instance) < ((count(up{job=~".*etcd.*"}) without (instance) + 1) / 2)
    for: 3m
@ -26,7 +27,8 @@ groups:
      severity: critical
  - alert: etcdNoLeader
    annotations:
-      message: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }} has no leader.'
+      message: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }} has
        no leader.'
    expr: |
      etcd_server_has_leader{job=~".*etcd.*"} == 0
    for: 1m
@ -34,7 +36,9 @@ groups:
      severity: critical
  - alert: etcdHighNumberOfLeaderChanges
    annotations:
-      message: 'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.'
+      message: 'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes within
        the last 15 minutes. Frequent elections may be a sign of insufficient resources,
        high network latency, or disruptions by other components and should be investigated.'
    expr: |
      increase((max without (instance) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"}) or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m]) >= 4
    for: 5m
@ -42,7 +46,8 @@ groups:
      severity: warning
  - alert: etcdHighNumberOfFailedGRPCRequests
    annotations:
-      message: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
+      message: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{
        $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
    expr: |
      100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) without (grpc_type, grpc_code)
        /
@ -53,7 +58,8 @@ groups:
      severity: warning
  - alert: etcdHighNumberOfFailedGRPCRequests
    annotations:
-      message: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
+      message: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{
        $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
    expr: |
      100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) without (grpc_type, grpc_code)
        /
@ -64,7 +70,8 @@ groups:
      severity: critical
  - alert: etcdGRPCRequestsSlow
    annotations:
-      message: 'etcd cluster "{{ $labels.job }}": gRPC requests to {{ $labels.grpc_method }} are taking {{ $value }}s on etcd instance {{ $labels.instance }}.'
+      message: 'etcd cluster "{{ $labels.job }}": gRPC requests to {{ $labels.grpc_method
        }} are taking {{ $value }}s on etcd instance {{ $labels.instance }}.'
    expr: |
      histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_type="unary"}[5m])) without(grpc_type))
      > 0.15
@ -73,7 +80,8 @@ groups:
      severity: critical
  - alert: etcdMemberCommunicationSlow
    annotations:
-      message: 'etcd cluster "{{ $labels.job }}": member communication with {{ $labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance }}.'
+      message: 'etcd cluster "{{ $labels.job }}": member communication with {{ $labels.To
        }} is taking {{ $value }}s on etcd instance {{ $labels.instance }}.'
    expr: |
      histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m]))
      > 0.15
@ -82,7 +90,8 @@ groups:
      severity: warning
  - alert: etcdHighNumberOfFailedProposals
    annotations:
-      message: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures within the last 30 minutes on etcd instance {{ $labels.instance }}.'
+      message: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures within
        the last 30 minutes on etcd instance {{ $labels.instance }}.'
    expr: |
      rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5
    for: 15m
@ -90,7 +99,8 @@ groups:
      severity: warning
  - alert: etcdHighFsyncDurations
    annotations:
-      message: 'etcd cluster "{{ $labels.job }}": 99th percentile fync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.'
+      message: 'etcd cluster "{{ $labels.job }}": 99th percentile fync durations are
        {{ $value }}s on etcd instance {{ $labels.instance }}.'
    expr: |
      histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
      > 0.5
@ -99,7 +109,8 @@ groups:
      severity: warning
  - alert: etcdHighCommitDurations
    annotations:
-      message: 'etcd cluster "{{ $labels.job }}": 99th percentile commit durations {{ $value }}s on etcd instance {{ $labels.instance }}.'
+      message: 'etcd cluster "{{ $labels.job }}": 99th percentile commit durations
        {{ $value }}s on etcd instance {{ $labels.instance }}.'
    expr: |
      histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
      > 0.25
@ -108,7 +119,8 @@ groups:
      severity: warning
  - alert: etcdHighNumberOfFailedHTTPRequests
    annotations:
-      message: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}'
+      message: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd
        instance {{ $labels.instance }}'
    expr: |
      sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) without (code) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
      without (code) > 0.01
@ -117,7 +129,8 @@ groups:
      severity: warning
  - alert: etcdHighNumberOfFailedHTTPRequests
    annotations:
-      message: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}.'
+      message: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd
        instance {{ $labels.instance }}.'
    expr: |
      sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) without (code) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
      without (code) > 0.05
@ -126,7 +139,8 @@ groups:
      severity: critical
  - alert: etcdHTTPRequestsSlow
    annotations:
-      message: etcd instance {{ $labels.instance }} HTTP requests to {{ $labels.method }} are slow.
+      message: etcd instance {{ $labels.instance }} HTTP requests to {{ $labels.method
        }} are slow.
    expr: |
      histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m]))
      > 0.15
--- a/assets/gluster/alerts.yaml
+++ b/assets/gluster/alerts.yaml
@ -49,7 +49,8 @@ groups:
      severity: critical
  - alert: GlusterBrickUtilization
    annotations:
-      message: Gluster Brick {{$labels.host}}:{{$labels.brick_path}} Utilization more than 80%
+      message: Gluster Brick {{$labels.host}}:{{$labels.brick_path}} Utilization more
        than 80%
    expr: |
      100 * gluster_brick_capacity_used_bytes{job="glusterd2-client"}
          / gluster_brick_capacity_bytes_total{job="glusterd2-client"} > 80
@ -58,7 +59,8 @@ groups:
      severity: warning
  - alert: GlusterBrickUtilization
    annotations:
-      message: Gluster Brick {{$labels.host}}:{{$labels.brick_path}} Utilization more than 90%
+      message: Gluster Brick {{$labels.host}}:{{$labels.brick_path}} Utilization more
        than 90%
    expr: |
      100 * gluster_brick_capacity_used_bytes{job="glusterd2-client"}
          / gluster_brick_capacity_bytes_total{job="glusterd2-client"} > 90
@ -69,7 +71,8 @@ groups:
  rules:
  - alert: GlusterThinpoolDataUtilization
    annotations:
-      message: Gluster Thinpool {{ $labels.thinpool_name }} Data Utilization more than 80%
+      message: Gluster Thinpool {{ $labels.thinpool_name }} Data Utilization more
        than 80%
    expr: |
      gluster_thinpool_data_used_bytes{job="glusterd2-client"} / gluster_thinpool_data_total_bytes{job="glusterd2-client"} > 0.8
    for: 5m
@ -77,7 +80,8 @@ groups:
      severity: warning
  - alert: GlusterThinpoolDataUtilization
    annotations:
-      message: Gluster Thinpool {{ $labels.thinpool_name }} Data Utilization more than 90%
+      message: Gluster Thinpool {{ $labels.thinpool_name }} Data Utilization more
        than 90%
    expr: |
      gluster_thinpool_data_used_bytes{job="glusterd2-client"} / gluster_thinpool_data_total_bytes{job="glusterd2-client"} > 0.9
    for: 5m
@ -85,7 +89,8 @@ groups:
      severity: critical
  - alert: GlusterThinpoolMetadataUtilization
    annotations:
-      message: Gluster Thinpool {{ $labels.thinpool_name }} Metadata Utilization more than 80%
+      message: Gluster Thinpool {{ $labels.thinpool_name }} Metadata Utilization more
        than 80%
    expr: |
      gluster_thinpool_metadata_used_bytes{job="glusterd2-client"} / gluster_thinpool_metadata_total_bytes{job="glusterd2-client"} > 0.8
    for: 5m
@ -93,7 +98,8 @@ groups:
      severity: warning
  - alert: GlusterThinpoolMetadataUtilization
    annotations:
-      message: Gluster Thinpool {{ $labels.thinpool_name }} Metadata Utilization more than 90%
+      message: Gluster Thinpool {{ $labels.thinpool_name }} Metadata Utilization more
        than 90%
    expr: |
      gluster_thinpool_metadata_used_bytes{job="glusterd2-client"} / gluster_thinpool_metadata_total_bytes{job="glusterd2-client"} > 0.9
    for: 5m
--- a/assets/jaeger/alerts.yaml
+++ b/assets/jaeger/alerts.yaml
@ -13,7 +13,9 @@ groups:
    annotations:
      message: |
        {{ $labels.job }} {{ $labels.instance }} is experiencing {{ printf "%.2f" $value }}% HTTP errors.
-    expr: 100 * sum(rate(jaeger_agent_http_server_errors_total[1m])) by (instance, job, namespace) / sum(rate(jaeger_agent_http_server_total[1m])) by (instance, job, namespace)> 1
+    expr: 100 * sum(rate(jaeger_agent_http_server_errors_total[1m])) by (instance,
      job, namespace) / sum(rate(jaeger_agent_http_server_total[1m])) by (instance,
      job, namespace)> 1
    for: 15m
    labels:
      severity: warning
@ -21,7 +23,9 @@ groups:
    annotations:
      message: |
        service {{ $labels.job }} {{ $labels.instance }} is dropping {{ printf "%.2f" $value }}% spans.
-    expr: 100 * sum(rate(jaeger_reporter_spans{result=~"dropped|err"}[1m])) by (instance, job, namespace) / sum(rate(jaeger_reporter_spans[1m])) by (instance, job, namespace)> 1
+    expr: 100 * sum(rate(jaeger_reporter_spans{result=~"dropped|err"}[1m])) by (instance,
      job, namespace) / sum(rate(jaeger_reporter_spans[1m])) by (instance, job, namespace)>
      1
    for: 15m
    labels:
      severity: warning
@ -29,7 +33,9 @@ groups:
    annotations:
      message: |
        agent {{ $labels.job }} {{ $labels.instance }} is dropping {{ printf "%.2f" $value }}% spans.
-    expr: 100 * sum(rate(jaeger_agent_reporter_batches_failures_total[1m])) by (instance, job, namespace) / sum(rate(jaeger_agent_reporter_batches_submitted_total[1m])) by (instance, job, namespace)> 1
+    expr: 100 * sum(rate(jaeger_agent_reporter_batches_failures_total[1m])) by (instance,
      job, namespace) / sum(rate(jaeger_agent_reporter_batches_submitted_total[1m]))
      by (instance, job, namespace)> 1
    for: 15m
    labels:
      severity: warning
@ -45,7 +51,9 @@ groups:
    annotations:
      message: |
        collector {{ $labels.job }} {{ $labels.instance }} is dropping {{ printf "%.2f" $value }}% spans.
-    expr: 100 * sum(rate(jaeger_collector_spans_dropped_total[1m])) by (instance, job, namespace) / sum(rate(jaeger_collector_spans_received_total[1m])) by (instance, job, namespace)> 1
+    expr: 100 * sum(rate(jaeger_collector_spans_dropped_total[1m])) by (instance,
      job, namespace) / sum(rate(jaeger_collector_spans_received_total[1m])) by (instance,
      job, namespace)> 1
    for: 15m
    labels:
      severity: warning
@ -53,7 +61,9 @@ groups:
    annotations:
      message: |
        {{ $labels.job }} {{ $labels.instance }} is failing {{ printf "%.2f" $value }}% in updating sampling policies.
-    expr: 100 * sum(rate(jaeger_sampler_queries{result="err"}[1m])) by (instance, job, namespace) / sum(rate(jaeger_sampler_queries[1m])) by (instance, job, namespace)> 1
+    expr: 100 * sum(rate(jaeger_sampler_queries{result="err"}[1m])) by (instance,
      job, namespace) / sum(rate(jaeger_sampler_queries[1m])) by (instance, job, namespace)>
      1
    for: 15m
    labels:
      severity: warning
@ -61,7 +71,8 @@ groups:
    annotations:
      message: |
        {{ $labels.job }} {{ $labels.instance }} is slow at persisting spans.
-    expr: histogram_quantile(0.99, sum by (le) (rate(jaeger_collector_save_latency_bucket[1m]))) > 0.5
+    expr: histogram_quantile(0.99, sum by (le) (rate(jaeger_collector_save_latency_bucket[1m])))
      > 0.5
    for: 15m
    labels:
      severity: warning
@ -69,7 +80,9 @@ groups:
    annotations:
      message: |
        {{ $labels.job }} {{ $labels.instance }} is failing {{ printf "%.2f" $value }}% in updating throttling policies.
-    expr: 100 * sum(rate(jaeger_throttler_updates{result="err"}[1m])) by (instance, job, namespace) / sum(rate(jaeger_throttler_updates[1m])) by (instance, job, namespace)> 1
+    expr: 100 * sum(rate(jaeger_throttler_updates{result="err"}[1m])) by (instance,
      job, namespace) / sum(rate(jaeger_throttler_updates[1m])) by (instance, job,
      namespace)> 1
    for: 15m
    labels:
      severity: warning
@ -77,7 +90,9 @@ groups:
    annotations:
      message: |
        {{ $labels.job }} {{ $labels.instance }} is seeing {{ printf "%.2f" $value }}% query errors on {{ $labels.operation }}.
-    expr: 100 * sum(rate(jaeger_query_requests_total{result="err"}[1m])) by (instance, job, namespace) / sum(rate(jaeger_query_requests_total[1m])) by (instance, job, namespace)> 1
+    expr: 100 * sum(rate(jaeger_query_requests_total{result="err"}[1m])) by (instance,
      job, namespace) / sum(rate(jaeger_query_requests_total[1m])) by (instance, job,
      namespace)> 1
    for: 15m
    labels:
      severity: warning
@ -85,7 +100,9 @@ groups:
    annotations:
      message: |
        {{ $labels.job }} {{ $labels.instance }} is seeing {{ printf "%.2f" $value }}% query errors on {{ $labels.operation }}.
-    expr: 100 * sum(rate(jaeger_cassandra_errors_total[1m])) by (instance, job, namespace) / sum(rate(jaeger_cassandra_attempts_total[1m])) by (instance, job, namespace)> 1
+    expr: 100 * sum(rate(jaeger_cassandra_errors_total[1m])) by (instance, job, namespace)
      / sum(rate(jaeger_cassandra_attempts_total[1m])) by (instance, job, namespace)>
      1
    for: 15m
    labels:
      severity: warning
@ -93,7 +110,9 @@ groups:
    annotations:
      message: |
        {{ $labels.job }} {{ $labels.instance }} is seeing {{ printf "%.2f" $value }}% query errors on {{ $labels.operation }}.
-    expr: 100 * sum(rate(jaeger_cassandra_read_errors_total[1m])) by (instance, job, namespace) / sum(rate(jaeger_cassandra_read_attempts_total[1m])) by (instance, job, namespace)> 1
+    expr: 100 * sum(rate(jaeger_cassandra_read_errors_total[1m])) by (instance, job,
      namespace) / sum(rate(jaeger_cassandra_read_attempts_total[1m])) by (instance,
      job, namespace)> 1
    for: 15m
    labels:
      severity: warning
--- a/assets/kube-cockroachdb/alerts.yaml
+++ b/assets/kube-cockroachdb/alerts.yaml
@ -3,7 +3,8 @@ groups:
  rules:
  - alert: CockroachInstanceFlapping
    annotations:
-      message: '{{ $labels.instance }} for cluster {{ $labels.cluster }} restarted {{ $value }} time(s) in 10m'
+      message: '{{ $labels.instance }} for cluster {{ $labels.cluster }} restarted
        {{ $value }} time(s) in 10m'
    expr: |
      resets(cockroachdb_sys_uptime{job="cockroachdb-public"}[10m]) > 5
    for: 1m
@ -29,7 +30,8 @@ groups:
      severity: warning
  - alert: CockroachStoreDiskLow
    annotations:
-      message: Store {{ $labels.store }} on node {{ $labels.instance }} at {{ $value }} available disk fraction
+      message: Store {{ $labels.store }} on node {{ $labels.instance }} at {{ $value
        }} available disk fraction
    expr: |
      :cockroachdb_capacity_available:ratio{job="cockroachdb-public"} < 0.15
    for: 30m
@ -61,7 +63,8 @@ groups:
      severity: warning
  - alert: CockroachHighOpenFDCount
    annotations:
-      message: 'Too many open file descriptors on {{ $labels.instance }}: {{ $value }} fraction used'
+      message: 'Too many open file descriptors on {{ $labels.instance }}: {{ $value
        }} fraction used'
    expr: |
      cockroachdb_sys_fd_open{job="cockroachdb-public"} / cockroachdb_sys_fd_softlimit{job="cockroachdb-public"} > 0.8
    for: 10m
--- a/assets/kube-state-metrics/alerts.yaml
+++ b/assets/kube-state-metrics/alerts.yaml
@ -3,7 +3,10 @@ groups:
  rules:
  - alert: KubeStateMetricsListErrors
    annotations:
-      message: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
+      description: kube-state-metrics is experiencing errors at an elevated rate in
        list operations. This is likely causing it to not be able to expose metrics
        about Kubernetes objects correctly or at all.
      summary: kube-state-metrics is experiencing errors in list operations.
    expr: |
      (sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m]))
        /
@ -14,7 +17,10 @@ groups:
      severity: critical
  - alert: KubeStateMetricsWatchErrors
    annotations:
-      message: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
+      description: kube-state-metrics is experiencing errors at an elevated rate in
        watch operations. This is likely causing it to not be able to expose metrics
        about Kubernetes objects correctly or at all.
      summary: kube-state-metrics is experiencing errors in watch operations.
    expr: |
      (sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m]))
        /
--- a/assets/kubernetes/alerts.yaml
+++ b/assets/kubernetes/alerts.yaml
@ -3,7 +3,8 @@ groups:
  rules:
  - alert: KubePodCrashLooping
    annotations:
-      message: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
+      message: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
        }}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping
    expr: |
      rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[5m]) * 60 * 5 > 0
@ -12,7 +13,8 @@ groups:
      severity: warning
  - alert: KubePodNotReady
    annotations:
-      message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes.
+      message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
        state for longer than 15 minutes.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready
    expr: |
      sum by (namespace, pod) (
@ -27,7 +29,9 @@ groups:
      severity: warning
  - alert: KubeDeploymentGenerationMismatch
    annotations:
-      message: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back.
+      message: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
        }} does not match, this indicates that the Deployment has failed but has not
        been rolled back.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentgenerationmismatch
    expr: |
      kube_deployment_status_observed_generation{job="kube-state-metrics"}
@ -38,7 +42,8 @@ groups:
      severity: warning
  - alert: KubeDeploymentReplicasMismatch
    annotations:
-      message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes.
+      message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not
        matched the expected number of replicas for longer than 15 minutes.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch
    expr: |
      (
@ -55,7 +60,8 @@ groups:
      severity: warning
  - alert: KubeStatefulSetReplicasMismatch
    annotations:
-      message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes.
+      message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not
        matched the expected number of replicas for longer than 15 minutes.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch
    expr: |
      (
@ -72,7 +78,9 @@ groups:
      severity: warning
  - alert: KubeStatefulSetGenerationMismatch
    annotations:
-      message: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back.
+      message: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
        }} does not match, this indicates that the StatefulSet has failed but has
        not been rolled back.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetgenerationmismatch
    expr: |
      kube_statefulset_status_observed_generation{job="kube-state-metrics"}
@ -83,7 +91,8 @@ groups:
      severity: warning
  - alert: KubeStatefulSetUpdateNotRolledOut
    annotations:
-      message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out.
+      message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update
        has not been rolled out.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetupdatenotrolledout
    expr: |
      (
@ -108,7 +117,8 @@ groups:
      severity: warning
  - alert: KubeDaemonSetRolloutStuck
    annotations:
-      message: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 15 minutes.
+      message: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished
        or progressed for at least 15 minutes.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetrolloutstuck
    expr: |
      (
@ -139,7 +149,8 @@ groups:
      severity: warning
  - alert: KubeContainerWaiting
    annotations:
-      message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}} has been in waiting state for longer than 1 hour.
+      message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}}
        has been in waiting state for longer than 1 hour.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting
    expr: |
      sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0
@ -148,7 +159,8 @@ groups:
      severity: warning
  - alert: KubeDaemonSetNotScheduled
    annotations:
-      message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled.'
+      message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
        }} are not scheduled.'
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetnotscheduled
    expr: |
      kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"}
@ -159,7 +171,8 @@ groups:
      severity: warning
  - alert: KubeDaemonSetMisScheduled
    annotations:
-      message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run.'
+      message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
        }} are running where they are not supposed to run.'
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetmisscheduled
    expr: |
      kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0
@ -168,7 +181,8 @@ groups:
      severity: warning
  - alert: KubeJobCompletion
    annotations:
-      message: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than 12 hours to complete.
+      message: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than
        12 hours to complete.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion
    expr: |
      kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"}  > 0
@ -186,7 +200,8 @@ groups:
      severity: warning
  - alert: KubeHpaReplicasMismatch
    annotations:
-      message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched the desired number of replicas for longer than 15 minutes.
+      message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched the desired
        number of replicas for longer than 15 minutes.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpareplicasmismatch
    expr: |
      (kube_hpa_status_desired_replicas{job="kube-state-metrics"}
@ -199,7 +214,8 @@ groups:
      severity: warning
  - alert: KubeHpaMaxedOut
    annotations:
-      message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running at max replicas for longer than 15 minutes.
+      message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running at max
        replicas for longer than 15 minutes.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpamaxedout
    expr: |
      kube_hpa_status_current_replicas{job="kube-state-metrics"}
@ -212,7 +228,8 @@ groups:
  rules:
  - alert: KubeCPUOvercommit
    annotations:
-      message: Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure.
+      message: Cluster has overcommitted CPU resource requests for Pods and cannot
        tolerate node failure.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
    expr: |
      sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum{})
@ -225,7 +242,8 @@ groups:
      severity: warning
  - alert: KubeMemoryOvercommit
    annotations:
-      message: Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure.
+      message: Cluster has overcommitted memory resource requests for Pods and cannot
        tolerate node failure.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit
    expr: |
      sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum{})
@ -264,7 +282,8 @@ groups:
      severity: warning
  - alert: KubeQuotaFullyUsed
    annotations:
-      message: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.
+      message: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
        }} of its {{ $labels.resource }} quota.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotafullyused
    expr: |
      kube_resourcequota{job="kube-state-metrics", type="used"}
@ -276,7 +295,9 @@ groups:
      severity: info
  - alert: CPUThrottlingHigh
    annotations:
-      message: '{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }}.'
+      message: '{{ $value | humanizePercentage }} throttling of CPU in namespace {{
        $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod
        }}.'
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
    expr: |
      sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (container, pod, namespace)
@ -290,7 +311,9 @@ groups:
  rules:
  - alert: KubePersistentVolumeFillingUp
    annotations:
-      message: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage }} free.
+      message: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }}
        in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage
        }} free.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
    expr: |
      kubelet_volume_stats_available_bytes{job="kubelet"}
@ -302,7 +325,9 @@ groups:
      severity: critical
  - alert: KubePersistentVolumeFillingUp
    annotations:
-      message: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available.
+      message: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
        }} in Namespace {{ $labels.namespace }} is expected to fill up within four
        days. Currently {{ $value | humanizePercentage }} is available.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
    expr: |
      (
@ -317,7 +342,8 @@ groups:
      severity: warning
  - alert: KubePersistentVolumeErrors
    annotations:
-      message: The persistent volume {{ $labels.persistentvolume }} has status {{ $labels.phase }}.
+      message: The persistent volume {{ $labels.persistentvolume }} has status {{
        $labels.phase }}.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeerrors
    expr: |
      kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0
@ -328,7 +354,8 @@ groups:
  rules:
  - alert: KubeVersionMismatch
    annotations:
-      message: There are {{ $value }} different semantic versions of Kubernetes components running.
+      message: There are {{ $value }} different semantic versions of Kubernetes components
        running.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeversionmismatch
    expr: |
      count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*).*"))) > 1
@ -337,7 +364,8 @@ groups:
      severity: warning
  - alert: KubeClientErrors
    annotations:
-      message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.'
+      message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance
        }}' is experiencing {{ $value | humanizePercentage }} errors.'
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors
    expr: |
      (sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job)
@ -405,7 +433,8 @@ groups:
  rules:
  - alert: KubeClientCertificateExpiration
    annotations:
-      message: A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.
+      message: A client certificate used to authenticate to the apiserver is expiring
        in less than 7.0 days.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
    expr: |
      apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 604800
@ -413,7 +442,8 @@ groups:
      severity: warning
  - alert: KubeClientCertificateExpiration
    annotations:
-      message: A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.
+      message: A client certificate used to authenticate to the apiserver is expiring
        in less than 24.0 hours.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
    expr: |
      apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 86400
@ -421,7 +451,9 @@ groups:
      severity: critical
  - alert: AggregatedAPIErrors
    annotations:
-      message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. The number of errors have increased for it in the past five minutes. High values indicate that the availability of the service changes too often.
+      message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported
        errors. The number of errors have increased for it in the past five minutes.
        High values indicate that the availability of the service changes too often.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapierrors
    expr: |
      sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[5m])) > 2
@ -429,7 +461,8 @@ groups:
      severity: warning
  - alert: AggregatedAPIDown
    annotations:
-      message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 5m.
+      message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been
        only {{ $value | humanize }}% available over the last 5m.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapidown
    expr: |
      (1 - max by(name, namespace)(avg_over_time(aggregator_unavailable_apiservice[5m]))) * 100 < 90
@ -466,7 +499,8 @@ groups:
      severity: warning
  - alert: KubeletTooManyPods
    annotations:
-      message: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity.
+      message: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage
        }} of its Pod capacity.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods
    expr: |
      count by(node) (
@ -481,7 +515,8 @@ groups:
      severity: warning
  - alert: KubeNodeReadinessFlapping
    annotations:
-      message: The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes.
+      message: The readiness status of node {{ $labels.node }} has changed {{ $value
        }} times in the last 15 minutes.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodereadinessflapping
    expr: |
      sum(changes(kube_node_status_condition{status="true",condition="Ready"}[15m])) by (node) > 2
@ -490,7 +525,8 @@ groups:
      severity: warning
  - alert: KubeletPlegDurationHigh
    annotations:
-      message: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.
+      message: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration
        of {{ $value }} seconds on node {{ $labels.node }}.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletplegdurationhigh
    expr: |
      node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
@ -499,7 +535,8 @@ groups:
      severity: warning
  - alert: KubeletPodStartUpLatencyHigh
    annotations:
-      message: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}.
+      message: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds
        on node {{ $labels.node }}.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh
    expr: |
      histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name{job="kubelet"} > 60
--- a/assets/node-exporter/alerts.yaml
+++ b/assets/node-exporter/alerts.yaml
@ -3,7 +3,8 @@ groups:
  rules:
  - alert: NodeFilesystemSpaceFillingUp
    annotations:
-      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up.
+      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has
        only {{ printf "%.2f" $value }}% available space left and is filling up.
      summary: Filesystem is predicted to run out of space within the next 24 hours.
    expr: |
      (
@ -18,7 +19,8 @@ groups:
      severity: warning
  - alert: NodeFilesystemSpaceFillingUp
    annotations:
-      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast.
+      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has
        only {{ printf "%.2f" $value }}% available space left and is filling up fast.
      summary: Filesystem is predicted to run out of space within the next 4 hours.
    expr: |
      (
@ -33,7 +35,8 @@ groups:
      severity: critical
  - alert: NodeFilesystemAlmostOutOfSpace
    annotations:
-      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
+      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has
        only {{ printf "%.2f" $value }}% available space left.
      summary: Filesystem has less than 5% space left.
    expr: |
      (
@ -46,7 +49,8 @@ groups:
      severity: warning
  - alert: NodeFilesystemAlmostOutOfSpace
    annotations:
-      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
+      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has
        only {{ printf "%.2f" $value }}% available space left.
      summary: Filesystem has less than 3% space left.
    expr: |
      (
@ -59,7 +63,8 @@ groups:
      severity: critical
  - alert: NodeFilesystemFilesFillingUp
    annotations:
-      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up.
+      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has
        only {{ printf "%.2f" $value }}% available inodes left and is filling up.
      summary: Filesystem is predicted to run out of inodes within the next 24 hours.
    expr: |
      (
@ -74,7 +79,8 @@ groups:
      severity: warning
  - alert: NodeFilesystemFilesFillingUp
    annotations:
-      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast.
+      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has
        only {{ printf "%.2f" $value }}% available inodes left and is filling up fast.
      summary: Filesystem is predicted to run out of inodes within the next 4 hours.
    expr: |
      (
@ -89,7 +95,8 @@ groups:
      severity: critical
  - alert: NodeFilesystemAlmostOutOfFiles
    annotations:
-      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
+      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has
        only {{ printf "%.2f" $value }}% available inodes left.
      summary: Filesystem has less than 5% inodes left.
    expr: |
      (
@ -102,7 +109,8 @@ groups:
      severity: warning
  - alert: NodeFilesystemAlmostOutOfFiles
    annotations:
-      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
+      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has
        only {{ printf "%.2f" $value }}% available inodes left.
      summary: Filesystem has less than 3% inodes left.
    expr: |
      (
@ -115,7 +123,8 @@ groups:
      severity: critical
  - alert: NodeNetworkReceiveErrs
    annotations:
-      description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.'
+      description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
        {{ printf "%.0f" $value }} receive errors in the last two minutes.'
      summary: Network interface is reporting many receive errors.
    expr: |
      increase(node_network_receive_errs_total[2m]) > 10
@ -124,7 +133,8 @@ groups:
      severity: warning
  - alert: NodeNetworkTransmitErrs
    annotations:
-      description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
+      description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
        {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
      summary: Network interface is reporting many transmit errors.
    expr: |
      increase(node_network_transmit_errs_total[2m]) > 10
@ -149,7 +159,8 @@ groups:
      severity: warning
  - alert: NodeClockSkewDetected
    annotations:
-      message: Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure NTP is configured correctly on this host.
+      message: Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure
        NTP is configured correctly on this host.
      summary: Clock skew detected.
    expr: |
      (
@ -168,7 +179,8 @@ groups:
      severity: warning
  - alert: NodeClockNotSynchronising
    annotations:
-      message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host.
+      message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is
        configured on this host.
      summary: Clock not synchronising.
    expr: |
      min_over_time(node_timex_sync_status[5m]) == 0
--- a/assets/prometheus/alerts.yaml
+++ b/assets/prometheus/alerts.yaml
@ -14,8 +14,10 @@ groups:
      severity: critical
  - alert: PrometheusNotificationQueueRunningFull
    annotations:
-      description: Alert notification queue of Prometheus {{$labels.instance}} is running full.
+      description: Alert notification queue of Prometheus {{$labels.instance}} is
-      summary: Prometheus alert notification queue predicted to run full in less than 30m.
+        running full.
      summary: Prometheus alert notification queue predicted to run full in less than
        30m.
    expr: |
      # Without min_over_time, failed scrapes could create false negatives, see
      # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
@ -29,8 +31,10 @@ groups:
      severity: warning
  - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
    annotations:
-      description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus {{$labels.instance}} to Alertmanager {{$labels.alertmanager}}.'
+      description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus
-      summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.
+        {{$labels.instance}} to Alertmanager {{$labels.alertmanager}}.'
      summary: Prometheus has encountered more than 1% errors sending alerts to a
        specific Alertmanager.
    expr: |
      (
        rate(prometheus_notifications_errors_total{job="prometheus"}[5m])
@ -44,7 +48,8 @@ groups:
      severity: warning
  - alert: PrometheusErrorSendingAlertsToAnyAlertmanager
    annotations:
-      description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.instance}} to any Alertmanager.'
+      description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts
        from Prometheus {{$labels.instance}} to any Alertmanager.'
      summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
    expr: |
      min without(alertmanager) (
@ -70,7 +75,8 @@ groups:
      severity: warning
  - alert: PrometheusTSDBReloadsFailing
    annotations:
-      description: Prometheus {{$labels.instance}} has detected {{$value | humanize}} reload failures over the last 3h.
+      description: Prometheus {{$labels.instance}} has detected {{$value | humanize}}
        reload failures over the last 3h.
      summary: Prometheus has issues reloading blocks from disk.
    expr: |
      increase(prometheus_tsdb_reloads_failures_total{job="prometheus"}[3h]) > 0
@ -79,7 +85,8 @@ groups:
      severity: warning
  - alert: PrometheusTSDBCompactionsFailing
    annotations:
-      description: Prometheus {{$labels.instance}} has detected {{$value | humanize}} compaction failures over the last 3h.
+      description: Prometheus {{$labels.instance}} has detected {{$value | humanize}}
        compaction failures over the last 3h.
      summary: Prometheus has issues compacting blocks.
    expr: |
      increase(prometheus_tsdb_compactions_failed_total{job="prometheus"}[3h]) > 0
@ -97,7 +104,8 @@ groups:
      severity: warning
  - alert: PrometheusDuplicateTimestamps
    annotations:
-      description: Prometheus {{$labels.instance}} is dropping {{ printf "%.4g" $value  }} samples/s with different values but duplicated timestamp.
+      description: Prometheus {{$labels.instance}} is dropping {{ printf "%.4g" $value  }}
        samples/s with different values but duplicated timestamp.
      summary: Prometheus is dropping samples with duplicate timestamps.
    expr: |
      rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus"}[5m]) > 0
@ -106,7 +114,8 @@ groups:
      severity: warning
  - alert: PrometheusOutOfOrderTimestamps
    annotations:
-      description: Prometheus {{$labels.instance}} is dropping {{ printf "%.4g" $value  }} samples/s with timestamps arriving out of order.
+      description: Prometheus {{$labels.instance}} is dropping {{ printf "%.4g" $value  }}
        samples/s with timestamps arriving out of order.
      summary: Prometheus drops samples with out-of-order timestamps.
    expr: |
      rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus"}[5m]) > 0
@ -115,7 +124,8 @@ groups:
      severity: warning
  - alert: PrometheusRemoteStorageFailures
    annotations:
-      description: Prometheus {{$labels.instance}} failed to send {{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }}
+      description: Prometheus {{$labels.instance}} failed to send {{ printf "%.1f"
        $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }}
      summary: Prometheus fails to send samples to remote storage.
    expr: |
      (
@ -134,7 +144,8 @@ groups:
      severity: critical
  - alert: PrometheusRemoteWriteBehind
    annotations:
-      description: Prometheus {{$labels.instance}} remote write is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}.
+      description: Prometheus {{$labels.instance}} remote write is {{ printf "%.1f"
        $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}.
      summary: Prometheus remote write is behind.
    expr: |
      # Without max_over_time, failed scrapes could create false negatives, see
@ -150,8 +161,12 @@ groups:
      severity: critical
  - alert: PrometheusRemoteWriteDesiredShards
    annotations:
-      description: Prometheus {{$labels.instance}} remote write desired shards calculation wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus"}` $labels.instance | query | first | value }}.
+      description: Prometheus {{$labels.instance}} remote write desired shards calculation
-      summary: Prometheus remote write desired shards calculation wants to run more than configured max shards.
+        wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url
        }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus"}`
        $labels.instance | query | first | value }}.
      summary: Prometheus remote write desired shards calculation wants to run more
        than configured max shards.
    expr: |
      # Without max_over_time, failed scrapes could create false negatives, see
      # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
@ -165,7 +180,8 @@ groups:
      severity: warning
  - alert: PrometheusRuleFailures
    annotations:
-      description: Prometheus {{$labels.instance}} has failed to evaluate {{ printf "%.0f" $value }} rules in the last 5m.
+      description: Prometheus {{$labels.instance}} has failed to evaluate {{ printf
        "%.0f" $value }} rules in the last 5m.
      summary: Prometheus is failing rule evaluations.
    expr: |
      increase(prometheus_rule_evaluation_failures_total{job="prometheus"}[5m]) > 0
@ -174,7 +190,8 @@ groups:
      severity: critical
  - alert: PrometheusMissingRuleEvaluations
    annotations:
-      description: Prometheus {{$labels.instance}} has missed {{ printf "%.0f" $value }} rule group evaluations in the last 5m.
+      description: Prometheus {{$labels.instance}} has missed {{ printf "%.0f" $value
        }} rule group evaluations in the last 5m.
      summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
    expr: |
      increase(prometheus_rule_group_iterations_missed_total{job="prometheus"}[5m]) > 0
@ -183,8 +200,10 @@ groups:
      severity: warning
  - alert: PrometheusTargetLimitHit
    annotations:
-      description: Prometheus {{$labels.instance}} has dropped {{ printf "%.0f" $value }} targets because the number of targets exceeded the configured target_limit.
+      description: Prometheus {{$labels.instance}} has dropped {{ printf "%.0f" $value
-      summary: Prometheus has dropped targets because some scrape configs have exceeded the targets limit.
+        }} targets because the number of targets exceeded the configured target_limit.
      summary: Prometheus has dropped targets because some scrape configs have exceeded
        the targets limit.
    expr: |
      increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus"}[5m]) > 0
    for: 15m
--- a/assets/thanos/alerts.yaml
+++ b/assets/thanos/alerts.yaml
@ -3,7 +3,8 @@ groups:
  rules:
  - alert: ThanosCompactMultipleRunning
    annotations:
-      message: No more than one Thanos Compact instance should be running at once. There are {{ $value }}
+      message: No more than one Thanos Compact instance should be running at once.
        There are {{ $value }}
    expr: sum(up{job=~"thanos-compact.*"}) > 1
    for: 5m
    labels:
@ -17,7 +18,8 @@ groups:
      severity: warning
  - alert: ThanosCompactHighCompactionFailures
    annotations:
-      message: Thanos Compact {{$labels.job}} is failing to execute {{ $value | humanize }}% of compactions.
+      message: Thanos Compact {{$labels.job}} is failing to execute {{ $value | humanize
        }}% of compactions.
    expr: |
      (
        sum by (job) (rate(thanos_compact_group_compactions_failures_total{job=~"thanos-compact.*"}[5m]))
@ -30,7 +32,8 @@ groups:
      severity: warning
  - alert: ThanosCompactBucketHighOperationFailures
    annotations:
-      message: Thanos Compact {{$labels.job}} Bucket is failing to execute {{ $value | humanize }}% of operations.
+      message: Thanos Compact {{$labels.job}} Bucket is failing to execute {{ $value
        | humanize }}% of operations.
    expr: |
      (
        sum by (job) (rate(thanos_objstore_bucket_operation_failures_total{job=~"thanos-compact.*"}[5m]))
@ -44,14 +47,16 @@ groups:
  - alert: ThanosCompactHasNotRun
    annotations:
      message: Thanos Compact {{$labels.job}} has not uploaded anything for 24 hours.
-    expr: (time() - max(max_over_time(thanos_objstore_bucket_last_successful_upload_time{job=~"thanos-compact.*"}[24h]))) / 60 / 60 > 24
+    expr: (time() - max(max_over_time(thanos_objstore_bucket_last_successful_upload_time{job=~"thanos-compact.*"}[24h])))
      / 60 / 60 > 24
    labels:
      severity: warning
 - name: thanos-query.rules
  rules:
  - alert: ThanosQueryHttpRequestQueryErrorRateHigh
    annotations:
-      message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize }}% of "query" requests.
+      message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize
        }}% of "query" requests.
    expr: |
      (
        sum(rate(http_requests_total{code=~"5..", job=~"thanos-query.*", handler="query"}[5m]))
@ -63,7 +68,8 @@ groups:
      severity: critical
  - alert: ThanosQueryHttpRequestQueryRangeErrorRateHigh
    annotations:
-      message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize }}% of "query_range" requests.
+      message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize
        }}% of "query_range" requests.
    expr: |
      (
        sum(rate(http_requests_total{code=~"5..", job=~"thanos-query.*", handler="query_range"}[5m]))
@ -75,7 +81,8 @@ groups:
      severity: critical
  - alert: ThanosQueryGrpcServerErrorRate
    annotations:
-      message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.
+      message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize
        }}% of requests.
    expr: |
      (
        sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-query.*"}[5m]))
@ -88,7 +95,8 @@ groups:
      severity: warning
  - alert: ThanosQueryGrpcClientErrorRate
    annotations:
-      message: Thanos Query {{$labels.job}} is failing to send {{ $value | humanize }}% of requests.
+      message: Thanos Query {{$labels.job}} is failing to send {{ $value | humanize
        }}% of requests.
    expr: |
      (
        sum by (job) (rate(grpc_client_handled_total{grpc_code!="OK", job=~"thanos-query.*"}[5m]))
@ -100,7 +108,8 @@ groups:
      severity: warning
  - alert: ThanosQueryHighDNSFailures
    annotations:
-      message: Thanos Query {{$labels.job}} have {{ $value | humanize }}% of failing DNS queries for store endpoints.
+      message: Thanos Query {{$labels.job}} have {{ $value | humanize }}% of failing
        DNS queries for store endpoints.
    expr: |
      (
        sum by (job) (rate(thanos_querier_store_apis_dns_failures_total{job=~"thanos-query.*"}[5m]))
@ -112,7 +121,8 @@ groups:
      severity: warning
  - alert: ThanosQueryInstantLatencyHigh
    annotations:
-      message: Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for instant queries.
+      message: Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value
        }} seconds for instant queries.
    expr: |
      (
        histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~"thanos-query.*", handler="query"}[5m]))) > 40
@ -124,7 +134,8 @@ groups:
      severity: critical
  - alert: ThanosQueryRangeLatencyHigh
    annotations:
-      message: Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for range queries.
+      message: Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value
        }} seconds for range queries.
    expr: |
      (
        histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~"thanos-query.*", handler="query_range"}[5m]))) > 90
@ -138,7 +149,8 @@ groups:
  rules:
  - alert: ThanosReceiveHttpRequestErrorRateHigh
    annotations:
-      message: Thanos Receive {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.
+      message: Thanos Receive {{$labels.job}} is failing to handle {{ $value | humanize
        }}% of requests.
    expr: |
      (
        sum(rate(http_requests_total{code=~"5..", job=~"thanos-receive.*", handler="receive"}[5m]))
@ -150,7 +162,8 @@ groups:
      severity: critical
  - alert: ThanosReceiveHttpRequestLatencyHigh
    annotations:
-      message: Thanos Receive {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for requests.
+      message: Thanos Receive {{$labels.job}} has a 99th percentile latency of {{
        $value }} seconds for requests.
    expr: |
      (
        histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~"thanos-receive.*", handler="receive"}[5m]))) > 10
@ -162,7 +175,8 @@ groups:
      severity: critical
  - alert: ThanosReceiveHighReplicationFailures
    annotations:
-      message: Thanos Receive {{$labels.job}} is failing to replicate {{ $value | humanize }}% of requests.
+      message: Thanos Receive {{$labels.job}} is failing to replicate {{ $value |
        humanize }}% of requests.
    expr: |
      thanos_receive_replication_factor > 1
        and
@ -184,7 +198,8 @@ groups:
      severity: warning
  - alert: ThanosReceiveHighForwardRequestFailures
    annotations:
-      message: Thanos Receive {{$labels.job}} is failing to forward {{ $value | humanize }}% of requests.
+      message: Thanos Receive {{$labels.job}} is failing to forward {{ $value | humanize
        }}% of requests.
    expr: |
      (
        sum by (job) (rate(thanos_receive_forward_requests_total{result="error", job=~"thanos-receive.*"}[5m]))
@ -196,7 +211,8 @@ groups:
      severity: warning
  - alert: ThanosReceiveHighHashringFileRefreshFailures
    annotations:
-      message: Thanos Receive {{$labels.job}} is failing to refresh hashring file, {{ $value | humanize }} of attempts failed.
+      message: Thanos Receive {{$labels.job}} is failing to refresh hashring file,
        {{ $value | humanize }} of attempts failed.
    expr: |
      (
        sum by (job) (rate(thanos_receive_hashrings_file_errors_total{job=~"thanos-receive.*"}[5m]))
@ -209,14 +225,17 @@ groups:
      severity: warning
  - alert: ThanosReceiveConfigReloadFailure
    annotations:
-      message: Thanos Receive {{$labels.job}} has not been able to reload hashring configurations.
+      message: Thanos Receive {{$labels.job}} has not been able to reload hashring
-    expr: avg(thanos_receive_config_last_reload_successful{job=~"thanos-receive.*"}) by (job) != 1
+        configurations.
    expr: avg(thanos_receive_config_last_reload_successful{job=~"thanos-receive.*"})
      by (job) != 1
    for: 5m
    labels:
      severity: warning
  - alert: ThanosReceiveNoUpload
    annotations:
-      message: Thanos Receive {{ $labels.instance }} of {{$labels.job}} has not uploaded latest data to object storage.
+      message: Thanos Receive {{ $labels.instance }} of {{$labels.job}} has not uploaded
        latest data to object storage.
    expr: |
      (up{job=~"thanos-receive.*"} - 1)
      + on (instance) # filters to only alert on current instance last 3h
@ -236,7 +255,8 @@ groups:
      severity: critical
  - alert: ThanosSidecarUnhealthy
    annotations:
-      message: Thanos Sidecar {{$labels.job}} {{$labels.pod}} is unhealthy for {{ $value }} seconds.
+      message: Thanos Sidecar {{$labels.job}} {{$labels.pod}} is unhealthy for {{
        $value }} seconds.
    expr: |
      time() - max(thanos_sidecar_last_heartbeat_success_time_seconds{job=~"thanos-sidecar.*"}) by (job, pod) >= 600
    labels:
@ -245,7 +265,8 @@ groups:
  rules:
  - alert: ThanosStoreGrpcErrorRate
    annotations:
-      message: Thanos Store {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.
+      message: Thanos Store {{$labels.job}} is failing to handle {{ $value | humanize
        }}% of requests.
    expr: |
      (
        sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-store.*"}[5m]))
@ -258,7 +279,8 @@ groups:
      severity: warning
  - alert: ThanosStoreSeriesGateLatencyHigh
    annotations:
-      message: Thanos Store {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for store series gate requests.
+      message: Thanos Store {{$labels.job}} has a 99th percentile latency of {{ $value
        }} seconds for store series gate requests.
    expr: |
      (
        histogram_quantile(0.9, sum by (job, le) (rate(thanos_bucket_store_series_gate_duration_seconds_bucket{job=~"thanos-store.*"}[5m]))) > 2
@ -270,7 +292,8 @@ groups:
      severity: warning
  - alert: ThanosStoreBucketHighOperationFailures
    annotations:
-      message: Thanos Store {{$labels.job}} Bucket is failing to execute {{ $value | humanize }}% of operations.
+      message: Thanos Store {{$labels.job}} Bucket is failing to execute {{ $value
        | humanize }}% of operations.
    expr: |
      (
        sum by (job) (rate(thanos_objstore_bucket_operation_failures_total{job=~"thanos-store.*"}[5m]))
@ -283,7 +306,8 @@ groups:
      severity: warning
  - alert: ThanosStoreObjstoreOperationLatencyHigh
    annotations:
-      message: Thanos Store {{$labels.job}} Bucket has a 99th percentile latency of {{ $value }} seconds for the bucket operations.
+      message: Thanos Store {{$labels.job}} Bucket has a 99th percentile latency of
        {{ $value }} seconds for the bucket operations.
    expr: |
      (
        histogram_quantile(0.9, sum by (job, le) (rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~"thanos-store.*"}[5m]))) > 2
@ -305,7 +329,8 @@ groups:
      severity: critical
  - alert: ThanosRuleSenderIsFailingAlerts
    annotations:
-      message: Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to send alerts to alertmanager.
+      message: Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to send alerts
        to alertmanager.
    expr: |
      sum by (job) (rate(thanos_alert_sender_alerts_dropped_total{job=~"thanos-rule.*"}[5m])) > 0
    for: 5m
@ -313,7 +338,8 @@ groups:
      severity: critical
  - alert: ThanosRuleHighRuleEvaluationFailures
    annotations:
-      message: Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to evaluate rules.
+      message: Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to evaluate
        rules.
    expr: |
      (
        sum by (job) (rate(prometheus_rule_evaluation_failures_total{job=~"thanos-rule.*"}[5m]))
@ -326,7 +352,8 @@ groups:
      severity: critical
  - alert: ThanosRuleHighRuleEvaluationWarnings
    annotations:
-      message: Thanos Rule {{$labels.job}} {{$labels.pod}} has high number of evaluation warnings.
+      message: Thanos Rule {{$labels.job}} {{$labels.pod}} has high number of evaluation
        warnings.
    expr: |
      sum by (job) (rate(thanos_rule_evaluation_with_warnings_total{job=~"thanos-rule.*"}[5m])) > 0
    for: 15m
@ -334,7 +361,8 @@ groups:
      severity: info
  - alert: ThanosRuleRuleEvaluationLatencyHigh
    annotations:
-      message: Thanos Rule {{$labels.job}}/{{$labels.pod}} has higher evaluation latency than interval for {{$labels.rule_group}}.
+      message: Thanos Rule {{$labels.job}}/{{$labels.pod}} has higher evaluation latency
        than interval for {{$labels.rule_group}}.
    expr: |
      (
        sum by (job, pod, rule_group) (prometheus_rule_group_last_duration_seconds{job=~"thanos-rule.*"})
@ -346,7 +374,8 @@ groups:
      severity: warning
  - alert: ThanosRuleGrpcErrorRate
    annotations:
-      message: Thanos Rule {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.
+      message: Thanos Rule {{$labels.job}} is failing to handle {{ $value | humanize
        }}% of requests.
    expr: |
      (
        sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-rule.*"}[5m]))
@ -360,13 +389,15 @@ groups:
  - alert: ThanosRuleConfigReloadFailure
    annotations:
      message: Thanos Rule {{$labels.job}} has not been able to reload its configuration.
-    expr: avg(thanos_rule_config_last_reload_successful{job=~"thanos-rule.*"}) by (job) != 1
+    expr: avg(thanos_rule_config_last_reload_successful{job=~"thanos-rule.*"}) by
      (job) != 1
    for: 5m
    labels:
      severity: info
  - alert: ThanosRuleQueryHighDNSFailures
    annotations:
-      message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing DNS queries for query endpoints.
+      message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing
        DNS queries for query endpoints.
    expr: |
      (
        sum by (job) (rate(thanos_ruler_query_apis_dns_failures_total{job=~"thanos-rule.*"}[5m]))
@ -379,7 +410,8 @@ groups:
      severity: warning
  - alert: ThanosRuleAlertmanagerHighDNSFailures
    annotations:
-      message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing DNS queries for Alertmanager endpoints.
+      message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing
        DNS queries for Alertmanager endpoints.
    expr: |
      (
        sum by (job) (rate(thanos_ruler_alertmanagers_dns_failures_total{job=~"thanos-rule.*"}[5m]))
@ -392,7 +424,8 @@ groups:
      severity: warning
  - alert: ThanosRuleNoEvaluationFor10Intervals
    annotations:
-      message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% rule groups that did not evaluate for at least 10x of their expected interval.
+      message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% rule groups
        that did not evaluate for at least 10x of their expected interval.
    expr: |
      time() -  max by (job, group) (prometheus_rule_group_last_evaluation_timestamp_seconds{job=~"thanos-rule.*"})
      >
@ -402,7 +435,8 @@ groups:
      severity: info
  - alert: ThanosNoRuleEvaluations
    annotations:
-      message: Thanos Rule {{$labels.job}} did not perform any rule evaluations in the past 2 minutes.
+      message: Thanos Rule {{$labels.job}} did not perform any rule evaluations in
        the past 2 minutes.
    expr: |
      sum(rate(prometheus_rule_evaluations_total{job=~"thanos-rule.*"}[2m])) <= 0
        and
@ -472,7 +506,8 @@ groups:
      severity: critical
  - alert: ThanosBucketReplicateErrorRate
    annotations:
-      message: Thanos Replicate failing to run, {{ $value | humanize }}% of attempts failed.
+      message: Thanos Replicate failing to run, {{ $value | humanize }}% of attempts
        failed.
    expr: |
      (
        sum(rate(thanos_replicate_replication_runs_total{result="error", job=~"thanos-bucket-replicate.*"}[5m]))
@ -484,7 +519,8 @@ groups:
      severity: critical
  - alert: ThanosBucketReplicateRunLatency
    annotations:
-      message: Thanos Replicate {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for the replicate operations.
+      message: Thanos Replicate {{$labels.job}} has a 99th percentile latency of {{
        $value }} seconds for the replicate operations.
    expr: |
      (
        histogram_quantile(0.9, sum by (job, le) (rate(thanos_replicate_replication_run_duration_seconds_bucket{job=~"thanos-bucket-replicate.*"}[5m]))) > 20
--- a/site/content/ceph/_index.md
+++ b/site/content/ceph/_index.md
@ -59,7 +59,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: CephMdsMissingReplicas
 annotations:
-  description: Minimum required replicas for storage metadata service not available. Might affect the working of storage cluster.
+  description: Minimum required replicas for storage metadata service not available.
    Might affect the working of storage cluster.
  message: Insufficient replicas for storage metadata service.
  severity_level: warning
  storage_type: ceph
@ -93,7 +94,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: CephMonHighNumberOfLeaderChanges
 annotations:
-  description: Ceph Monitor {{ $labels.ceph_daemon }} on host {{ $labels.hostname }} has seen {{ $value | printf "%.2f" }} leader changes per minute recently.
+  description: Ceph Monitor {{ $labels.ceph_daemon }} on host {{ $labels.hostname
    }} has seen {{ $value | printf "%.2f" }} leader changes per minute recently.
  message: Storage Cluster has seen many leader changes recently.
  severity_level: warning
  storage_type: ceph
@ -129,7 +131,9 @@ labels:
 {{< code lang="yaml" >}}
 alert: CephOSDCriticallyFull
 annotations:
-  description: Utilization of back-end storage device {{ $labels.ceph_daemon }} has crossed 85% on host {{ $labels.hostname }}. Immediately free up some space or expand the storage cluster or contact support.
+  description: Utilization of back-end storage device {{ $labels.ceph_daemon }} has
    crossed 85% on host {{ $labels.hostname }}. Immediately free up some space or
    expand the storage cluster or contact support.
  message: Back-end storage device is critically full.
  severity_level: error
  storage_type: ceph
@ -145,7 +149,9 @@ labels:
 {{< code lang="yaml" >}}
 alert: CephOSDNearFull
 annotations:
-  description: Utilization of back-end storage device {{ $labels.ceph_daemon }} has crossed 75% on host {{ $labels.hostname }}. Free up some space or expand the storage cluster or contact support.
+  description: Utilization of back-end storage device {{ $labels.ceph_daemon }} has
    crossed 75% on host {{ $labels.hostname }}. Free up some space or expand the storage
    cluster or contact support.
  message: Back-end storage device is nearing full.
  severity_level: warning
  storage_type: ceph
@ -161,7 +167,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: CephOSDDiskNotResponding
 annotations:
-  description: Disk device {{ $labels.device }} not responding, on host {{ $labels.host }}.
+  description: Disk device {{ $labels.device }} not responding, on host {{ $labels.host
    }}.
  message: Disk not responding
  severity_level: error
  storage_type: ceph
@ -177,7 +184,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: CephOSDDiskUnavailable
 annotations:
-  description: Disk device {{ $labels.device }} not accessible on host {{ $labels.host }}.
+  description: Disk device {{ $labels.device }} not accessible on host {{ $labels.host
    }}.
  message: Disk not accessible
  severity_level: error
  storage_type: ceph
@ -227,8 +235,10 @@ labels:
 {{< code lang="yaml" >}}
 alert: PersistentVolumeUsageNearFull
 annotations:
-  description: PVC {{ $labels.persistentvolumeclaim }} utilization has crossed 75%. Free up some space or expand the PVC.
+  description: PVC {{ $labels.persistentvolumeclaim }} utilization has crossed 75%.
-  message: PVC {{ $labels.persistentvolumeclaim }} is nearing full. Data deletion or PVC expansion is required.
+    Free up some space or expand the PVC.
  message: PVC {{ $labels.persistentvolumeclaim }} is nearing full. Data deletion
    or PVC expansion is required.
  severity_level: warning
  storage_type: ceph
 expr: |
@ -243,8 +253,10 @@ labels:
 {{< code lang="yaml" >}}
 alert: PersistentVolumeUsageCritical
 annotations:
-  description: PVC {{ $labels.persistentvolumeclaim }} utilization has crossed 85%. Free up some space or expand the PVC immediately.
+  description: PVC {{ $labels.persistentvolumeclaim }} utilization has crossed 85%.
-  message: PVC {{ $labels.persistentvolumeclaim }} is critically full. Data deletion or PVC expansion is required.
+    Free up some space or expand the PVC immediately.
  message: PVC {{ $labels.persistentvolumeclaim }} is critically full. Data deletion
    or PVC expansion is required.
  severity_level: error
  storage_type: ceph
 expr: |
@ -327,8 +339,10 @@ labels:
 {{< code lang="yaml" >}}
 alert: CephClusterNearFull
 annotations:
-  description: Storage cluster utilization has crossed 75% and will become read-only at 85%. Free up some space or expand the storage cluster.
+  description: Storage cluster utilization has crossed 75% and will become read-only
-  message: Storage cluster is nearing full. Data deletion or cluster expansion is required.
+    at 85%. Free up some space or expand the storage cluster.
  message: Storage cluster is nearing full. Data deletion or cluster expansion is
    required.
  severity_level: warning
  storage_type: ceph
 expr: |
@ -343,8 +357,10 @@ labels:
 {{< code lang="yaml" >}}
 alert: CephClusterCriticallyFull
 annotations:
-  description: Storage cluster utilization has crossed 80% and will become read-only at 85%. Free up some space or expand the storage cluster immediately.
+  description: Storage cluster utilization has crossed 80% and will become read-only
-  message: Storage cluster is critically full and needs immediate data deletion or cluster expansion.
+    at 85%. Free up some space or expand the storage cluster immediately.
  message: Storage cluster is critically full and needs immediate data deletion or
    cluster expansion.
  severity_level: error
  storage_type: ceph
 expr: |
@ -359,8 +375,10 @@ labels:
 {{< code lang="yaml" >}}
 alert: CephClusterReadOnly
 annotations:
-  description: Storage cluster utilization has crossed 85% and will become read-only now. Free up some space or expand the storage cluster immediately.
+  description: Storage cluster utilization has crossed 85% and will become read-only
-  message: Storage cluster is read-only now and needs immediate data deletion or cluster expansion.
+    now. Free up some space or expand the storage cluster immediately.
  message: Storage cluster is read-only now and needs immediate data deletion or cluster
    expansion.
  severity_level: error
  storage_type: ceph
 expr: |
--- a/site/content/coredns/_index.md
+++ b/site/content/coredns/_index.md
@ -39,7 +39,8 @@ https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-core
 {{< code lang="yaml" >}}
 alert: CoreDNSLatencyHigh
 annotations:
-  message: CoreDNS has 99th percentile latency of {{ $value }} seconds for server {{ $labels.server }} zone {{ $labels.zone }} .
+  message: CoreDNS has 99th percentile latency of {{ $value }} seconds for server
    {{ $labels.server }} zone {{ $labels.zone }} .
  runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednslatencyhigh
 expr: |
  histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_seconds_bucket{job="kube-dns"}[5m])) by(server, zone, le)) > 4
@ -54,7 +55,8 @@ https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-core
 {{< code lang="yaml" >}}
 alert: CoreDNSErrorsHigh
 annotations:
-  message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of requests.
+  message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of
    requests.
  runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednserrorshigh
 expr: |
  sum(rate(coredns_dns_response_rcode_count_total{job="kube-dns",rcode="SERVFAIL"}[5m]))
@ -71,7 +73,8 @@ https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-core
 {{< code lang="yaml" >}}
 alert: CoreDNSErrorsHigh
 annotations:
-  message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of requests.
+  message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of
    requests.
  runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednserrorshigh
 expr: |
  sum(rate(coredns_dns_response_rcode_count_total{job="kube-dns",rcode="SERVFAIL"}[5m]))
@ -90,7 +93,8 @@ https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-core
 {{< code lang="yaml" >}}
 alert: CoreDNSForwardLatencyHigh
 annotations:
-  message: CoreDNS has 99th percentile latency of {{ $value }} seconds forwarding requests to {{ $labels.to }}.
+  message: CoreDNS has 99th percentile latency of {{ $value }} seconds forwarding
    requests to {{ $labels.to }}.
  runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsforwardlatencyhigh
 expr: |
  histogram_quantile(0.99, sum(rate(coredns_forward_request_duration_seconds_bucket{job="kube-dns"}[5m])) by(to, le)) > 4
@ -105,7 +109,8 @@ https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-core
 {{< code lang="yaml" >}}
 alert: CoreDNSForwardErrorsHigh
 annotations:
-  message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of forward requests to {{ $labels.to }}.
+  message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of
    forward requests to {{ $labels.to }}.
  runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsforwarderrorshigh
 expr: |
  sum(rate(coredns_forward_response_rcode_count_total{job="kube-dns",rcode="SERVFAIL"}[5m]))
@ -122,7 +127,8 @@ https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-core
 {{< code lang="yaml" >}}
 alert: CoreDNSForwardErrorsHigh
 annotations:
-  message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of forward requests to {{ $labels.to }}.
+  message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of
    forward requests to {{ $labels.to }}.
  runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsforwarderrorshigh
 expr: |
  sum(rate(coredns_dns_response_rcode_count_total{job="kube-dns",rcode="SERVFAIL"}[5m]))
--- a/site/content/cortex/_index.md
+++ b/site/content/cortex/_index.md
--- a/site/content/etcd/_index.md
+++ b/site/content/etcd/_index.md
@ -56,7 +56,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: etcdNoLeader
 annotations:
-  message: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }} has no leader.'
+  message: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }} has no
    leader.'
 expr: |
  etcd_server_has_leader{job=~".*etcd.*"} == 0
 for: 1m
@ -69,7 +70,9 @@ labels:
 {{< code lang="yaml" >}}
 alert: etcdHighNumberOfLeaderChanges
 annotations:
-  message: 'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.'
+  message: 'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes within the
    last 15 minutes. Frequent elections may be a sign of insufficient resources, high
    network latency, or disruptions by other components and should be investigated.'
 expr: |
  increase((max without (instance) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"}) or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m]) >= 4
 for: 5m
@ -82,7 +85,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: etcdHighNumberOfFailedGRPCRequests
 annotations:
-  message: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
+  message: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method
    }} failed on etcd instance {{ $labels.instance }}.'
 expr: |
  100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) without (grpc_type, grpc_code)
    /
@ -98,7 +102,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: etcdHighNumberOfFailedGRPCRequests
 annotations:
-  message: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
+  message: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method
    }} failed on etcd instance {{ $labels.instance }}.'
 expr: |
  100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) without (grpc_type, grpc_code)
    /
@ -114,7 +119,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: etcdGRPCRequestsSlow
 annotations:
-  message: 'etcd cluster "{{ $labels.job }}": gRPC requests to {{ $labels.grpc_method }} are taking {{ $value }}s on etcd instance {{ $labels.instance }}.'
+  message: 'etcd cluster "{{ $labels.job }}": gRPC requests to {{ $labels.grpc_method
    }} are taking {{ $value }}s on etcd instance {{ $labels.instance }}.'
 expr: |
  histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_type="unary"}[5m])) without(grpc_type))
  > 0.15
@ -128,7 +134,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: etcdMemberCommunicationSlow
 annotations:
-  message: 'etcd cluster "{{ $labels.job }}": member communication with {{ $labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance }}.'
+  message: 'etcd cluster "{{ $labels.job }}": member communication with {{ $labels.To
    }} is taking {{ $value }}s on etcd instance {{ $labels.instance }}.'
 expr: |
  histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m]))
  > 0.15
@ -142,7 +149,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: etcdHighNumberOfFailedProposals
 annotations:
-  message: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures within the last 30 minutes on etcd instance {{ $labels.instance }}.'
+  message: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures within
    the last 30 minutes on etcd instance {{ $labels.instance }}.'
 expr: |
  rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5
 for: 15m
@ -155,7 +163,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: etcdHighFsyncDurations
 annotations:
-  message: 'etcd cluster "{{ $labels.job }}": 99th percentile fync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.'
+  message: 'etcd cluster "{{ $labels.job }}": 99th percentile fync durations are {{
    $value }}s on etcd instance {{ $labels.instance }}.'
 expr: |
  histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
  > 0.5
@ -169,7 +178,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: etcdHighCommitDurations
 annotations:
-  message: 'etcd cluster "{{ $labels.job }}": 99th percentile commit durations {{ $value }}s on etcd instance {{ $labels.instance }}.'
+  message: 'etcd cluster "{{ $labels.job }}": 99th percentile commit durations {{
    $value }}s on etcd instance {{ $labels.instance }}.'
 expr: |
  histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
  > 0.25
@ -183,7 +193,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: etcdHighNumberOfFailedHTTPRequests
 annotations:
-  message: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}'
+  message: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance
    {{ $labels.instance }}'
 expr: |
  sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) without (code) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
  without (code) > 0.01
@ -197,7 +208,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: etcdHighNumberOfFailedHTTPRequests
 annotations:
-  message: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}.'
+  message: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance
    {{ $labels.instance }}.'
 expr: |
  sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) without (code) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
  without (code) > 0.05
@ -211,7 +223,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: etcdHTTPRequestsSlow
 annotations:
-  message: etcd instance {{ $labels.instance }} HTTP requests to {{ $labels.method }} are slow.
+  message: etcd instance {{ $labels.instance }} HTTP requests to {{ $labels.method
    }} are slow.
 expr: |
  histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m]))
  > 0.15
--- a/site/content/gluster/_index.md
+++ b/site/content/gluster/_index.md
@ -96,7 +96,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: GlusterBrickUtilization
 annotations:
-  message: Gluster Brick {{$labels.host}}:{{$labels.brick_path}} Utilization more than 80%
+  message: Gluster Brick {{$labels.host}}:{{$labels.brick_path}} Utilization more
    than 80%
 expr: |
  100 * gluster_brick_capacity_used_bytes{job="glusterd2-client"}
      / gluster_brick_capacity_bytes_total{job="glusterd2-client"} > 80
@ -110,7 +111,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: GlusterBrickUtilization
 annotations:
-  message: Gluster Brick {{$labels.host}}:{{$labels.brick_path}} Utilization more than 90%
+  message: Gluster Brick {{$labels.host}}:{{$labels.brick_path}} Utilization more
    than 90%
 expr: |
  100 * gluster_brick_capacity_used_bytes{job="glusterd2-client"}
      / gluster_brick_capacity_bytes_total{job="glusterd2-client"} > 90
@ -126,7 +128,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: GlusterThinpoolDataUtilization
 annotations:
-  message: Gluster Thinpool {{ $labels.thinpool_name }} Data Utilization more than 80%
+  message: Gluster Thinpool {{ $labels.thinpool_name }} Data Utilization more than
    80%
 expr: |
  gluster_thinpool_data_used_bytes{job="glusterd2-client"} / gluster_thinpool_data_total_bytes{job="glusterd2-client"} > 0.8
 for: 5m
@ -139,7 +142,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: GlusterThinpoolDataUtilization
 annotations:
-  message: Gluster Thinpool {{ $labels.thinpool_name }} Data Utilization more than 90%
+  message: Gluster Thinpool {{ $labels.thinpool_name }} Data Utilization more than
    90%
 expr: |
  gluster_thinpool_data_used_bytes{job="glusterd2-client"} / gluster_thinpool_data_total_bytes{job="glusterd2-client"} > 0.9
 for: 5m
@ -152,7 +156,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: GlusterThinpoolMetadataUtilization
 annotations:
-  message: Gluster Thinpool {{ $labels.thinpool_name }} Metadata Utilization more than 80%
+  message: Gluster Thinpool {{ $labels.thinpool_name }} Metadata Utilization more
    than 80%
 expr: |
  gluster_thinpool_metadata_used_bytes{job="glusterd2-client"} / gluster_thinpool_metadata_total_bytes{job="glusterd2-client"} > 0.8
 for: 5m
@ -165,7 +170,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: GlusterThinpoolMetadataUtilization
 annotations:
-  message: Gluster Thinpool {{ $labels.thinpool_name }} Metadata Utilization more than 90%
+  message: Gluster Thinpool {{ $labels.thinpool_name }} Metadata Utilization more
    than 90%
 expr: |
  gluster_thinpool_metadata_used_bytes{job="glusterd2-client"} / gluster_thinpool_metadata_total_bytes{job="glusterd2-client"} > 0.9
 for: 5m
--- a/site/content/jaeger/_index.md
+++ b/site/content/jaeger/_index.md
@ -38,7 +38,9 @@ alert: JaegerAgentHTTPServerErrs
 annotations:
  message: |
    {{ $labels.job }} {{ $labels.instance }} is experiencing {{ printf "%.2f" $value }}% HTTP errors.
-expr: 100 * sum(rate(jaeger_agent_http_server_errors_total[1m])) by (instance, job, namespace) / sum(rate(jaeger_agent_http_server_total[1m])) by (instance, job, namespace)> 1
+expr: 100 * sum(rate(jaeger_agent_http_server_errors_total[1m])) by (instance, job,
  namespace) / sum(rate(jaeger_agent_http_server_total[1m])) by (instance, job, namespace)>
  1
 for: 15m
 labels:
  severity: warning
@ -51,7 +53,9 @@ alert: JaegerClientSpansDropped
 annotations:
  message: |
    service {{ $labels.job }} {{ $labels.instance }} is dropping {{ printf "%.2f" $value }}% spans.
-expr: 100 * sum(rate(jaeger_reporter_spans{result=~"dropped|err"}[1m])) by (instance, job, namespace) / sum(rate(jaeger_reporter_spans[1m])) by (instance, job, namespace)> 1
+expr: 100 * sum(rate(jaeger_reporter_spans{result=~"dropped|err"}[1m])) by (instance,
  job, namespace) / sum(rate(jaeger_reporter_spans[1m])) by (instance, job, namespace)>
  1
 for: 15m
 labels:
  severity: warning
@ -64,7 +68,9 @@ alert: JaegerAgentSpansDropped
 annotations:
  message: |
    agent {{ $labels.job }} {{ $labels.instance }} is dropping {{ printf "%.2f" $value }}% spans.
-expr: 100 * sum(rate(jaeger_agent_reporter_batches_failures_total[1m])) by (instance, job, namespace) / sum(rate(jaeger_agent_reporter_batches_submitted_total[1m])) by (instance, job, namespace)> 1
+expr: 100 * sum(rate(jaeger_agent_reporter_batches_failures_total[1m])) by (instance,
  job, namespace) / sum(rate(jaeger_agent_reporter_batches_submitted_total[1m])) by
  (instance, job, namespace)> 1
 for: 15m
 labels:
  severity: warning
@ -90,7 +96,9 @@ alert: JaegerCollectorDroppingSpans
 annotations:
  message: |
    collector {{ $labels.job }} {{ $labels.instance }} is dropping {{ printf "%.2f" $value }}% spans.
-expr: 100 * sum(rate(jaeger_collector_spans_dropped_total[1m])) by (instance, job, namespace) / sum(rate(jaeger_collector_spans_received_total[1m])) by (instance, job, namespace)> 1
+expr: 100 * sum(rate(jaeger_collector_spans_dropped_total[1m])) by (instance, job,
  namespace) / sum(rate(jaeger_collector_spans_received_total[1m])) by (instance,
  job, namespace)> 1
 for: 15m
 labels:
  severity: warning
@ -103,7 +111,9 @@ alert: JaegerSamplingUpdateFailing
 annotations:
  message: |
    {{ $labels.job }} {{ $labels.instance }} is failing {{ printf "%.2f" $value }}% in updating sampling policies.
-expr: 100 * sum(rate(jaeger_sampler_queries{result="err"}[1m])) by (instance, job, namespace) / sum(rate(jaeger_sampler_queries[1m])) by (instance, job, namespace)> 1
+expr: 100 * sum(rate(jaeger_sampler_queries{result="err"}[1m])) by (instance, job,
  namespace) / sum(rate(jaeger_sampler_queries[1m])) by (instance, job, namespace)>
  1
 for: 15m
 labels:
  severity: warning
@ -116,7 +126,8 @@ alert: JaegerCollectorPersistenceSlow
 annotations:
  message: |
    {{ $labels.job }} {{ $labels.instance }} is slow at persisting spans.
-expr: histogram_quantile(0.99, sum by (le) (rate(jaeger_collector_save_latency_bucket[1m]))) > 0.5
+expr: histogram_quantile(0.99, sum by (le) (rate(jaeger_collector_save_latency_bucket[1m])))
  > 0.5
 for: 15m
 labels:
  severity: warning
@ -129,7 +140,9 @@ alert: JaegerThrottlingUpdateFailing
 annotations:
  message: |
    {{ $labels.job }} {{ $labels.instance }} is failing {{ printf "%.2f" $value }}% in updating throttling policies.
-expr: 100 * sum(rate(jaeger_throttler_updates{result="err"}[1m])) by (instance, job, namespace) / sum(rate(jaeger_throttler_updates[1m])) by (instance, job, namespace)> 1
+expr: 100 * sum(rate(jaeger_throttler_updates{result="err"}[1m])) by (instance, job,
  namespace) / sum(rate(jaeger_throttler_updates[1m])) by (instance, job, namespace)>
  1
 for: 15m
 labels:
  severity: warning
@ -142,7 +155,9 @@ alert: JaegerQueryReqsFailing
 annotations:
  message: |
    {{ $labels.job }} {{ $labels.instance }} is seeing {{ printf "%.2f" $value }}% query errors on {{ $labels.operation }}.
-expr: 100 * sum(rate(jaeger_query_requests_total{result="err"}[1m])) by (instance, job, namespace) / sum(rate(jaeger_query_requests_total[1m])) by (instance, job, namespace)> 1
+expr: 100 * sum(rate(jaeger_query_requests_total{result="err"}[1m])) by (instance,
  job, namespace) / sum(rate(jaeger_query_requests_total[1m])) by (instance, job,
  namespace)> 1
 for: 15m
 labels:
  severity: warning
@ -155,7 +170,9 @@ alert: JaegerCassandraWritesFailing
 annotations:
  message: |
    {{ $labels.job }} {{ $labels.instance }} is seeing {{ printf "%.2f" $value }}% query errors on {{ $labels.operation }}.
-expr: 100 * sum(rate(jaeger_cassandra_errors_total[1m])) by (instance, job, namespace) / sum(rate(jaeger_cassandra_attempts_total[1m])) by (instance, job, namespace)> 1
+expr: 100 * sum(rate(jaeger_cassandra_errors_total[1m])) by (instance, job, namespace)
  / sum(rate(jaeger_cassandra_attempts_total[1m])) by (instance, job, namespace)>
  1
 for: 15m
 labels:
  severity: warning
@ -168,7 +185,9 @@ alert: JaegerCassandraReadsFailing
 annotations:
  message: |
    {{ $labels.job }} {{ $labels.instance }} is seeing {{ printf "%.2f" $value }}% query errors on {{ $labels.operation }}.
-expr: 100 * sum(rate(jaeger_cassandra_read_errors_total[1m])) by (instance, job, namespace) / sum(rate(jaeger_cassandra_read_attempts_total[1m])) by (instance, job, namespace)> 1
+expr: 100 * sum(rate(jaeger_cassandra_read_errors_total[1m])) by (instance, job, namespace)
  / sum(rate(jaeger_cassandra_read_attempts_total[1m])) by (instance, job, namespace)>
  1
 for: 15m
 labels:
  severity: warning
--- a/site/content/kube-cockroachdb/_index.md
+++ b/site/content/kube-cockroachdb/_index.md
@ -23,7 +23,8 @@ Complete list of pregenerated alerts is available [here](https://github.com/moni
 {{< code lang="yaml" >}}
 alert: CockroachInstanceFlapping
 annotations:
-  message: '{{ $labels.instance }} for cluster {{ $labels.cluster }} restarted {{ $value }} time(s) in 10m'
+  message: '{{ $labels.instance }} for cluster {{ $labels.cluster }} restarted {{
    $value }} time(s) in 10m'
 expr: |
  resets(cockroachdb_sys_uptime{job="cockroachdb-public"}[10m]) > 5
 for: 1m
@ -64,7 +65,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: CockroachStoreDiskLow
 annotations:
-  message: Store {{ $labels.store }} on node {{ $labels.instance }} at {{ $value }} available disk fraction
+  message: Store {{ $labels.store }} on node {{ $labels.instance }} at {{ $value }}
    available disk fraction
 expr: |
  :cockroachdb_capacity_available:ratio{job="cockroachdb-public"} < 0.15
 for: 30m
@ -116,7 +118,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: CockroachHighOpenFDCount
 annotations:
-  message: 'Too many open file descriptors on {{ $labels.instance }}: {{ $value }} fraction used'
+  message: 'Too many open file descriptors on {{ $labels.instance }}: {{ $value }}
    fraction used'
 expr: |
  cockroachdb_sys_fd_open{job="cockroachdb-public"} / cockroachdb_sys_fd_softlimit{job="cockroachdb-public"} > 0.8
 for: 10m
--- a/site/content/kube-state-metrics/_index.md
+++ b/site/content/kube-state-metrics/_index.md
@ -23,7 +23,10 @@ Complete list of pregenerated alerts is available [here](https://github.com/moni
 {{< code lang="yaml" >}}
 alert: KubeStateMetricsListErrors
 annotations:
-  message: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
+  description: kube-state-metrics is experiencing errors at an elevated rate in list
    operations. This is likely causing it to not be able to expose metrics about Kubernetes
    objects correctly or at all.
  summary: kube-state-metrics is experiencing errors in list operations.
 expr: |
  (sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m]))
    /
@ -39,7 +42,10 @@ labels:
 {{< code lang="yaml" >}}
 alert: KubeStateMetricsWatchErrors
 annotations:
-  message: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
+  description: kube-state-metrics is experiencing errors at an elevated rate in watch
    operations. This is likely causing it to not be able to expose metrics about Kubernetes
    objects correctly or at all.
  summary: kube-state-metrics is experiencing errors in watch operations.
 expr: |
  (sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m]))
    /
--- a/site/content/kubernetes/_index.md
+++ b/site/content/kubernetes/_index.md
@ -24,7 +24,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubePodCrashLooping
 annotations:
-  message: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
+  message: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }})
    is restarting {{ printf "%.2f" $value }} times / 5 minutes.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping
 expr: |
  rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[5m]) * 60 * 5 > 0
@ -39,7 +40,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubePodNotReady
 annotations:
-  message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes.
+  message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state
    for longer than 15 minutes.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready
 expr: |
  sum by (namespace, pod) (
@ -60,7 +62,9 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeDeploymentGenerationMismatch
 annotations:
-  message: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back.
+  message: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
    }} does not match, this indicates that the Deployment has failed but has not been
    rolled back.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentgenerationmismatch
 expr: |
  kube_deployment_status_observed_generation{job="kube-state-metrics"}
@ -77,7 +81,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeDeploymentReplicasMismatch
 annotations:
-  message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes.
+  message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched
    the expected number of replicas for longer than 15 minutes.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch
 expr: |
  (
@ -100,7 +105,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeStatefulSetReplicasMismatch
 annotations:
-  message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes.
+  message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched
    the expected number of replicas for longer than 15 minutes.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch
 expr: |
  (
@ -123,7 +129,9 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeStatefulSetGenerationMismatch
 annotations:
-  message: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back.
+  message: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
    }} does not match, this indicates that the StatefulSet has failed but has not
    been rolled back.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetgenerationmismatch
 expr: |
  kube_statefulset_status_observed_generation{job="kube-state-metrics"}
@ -140,7 +148,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeStatefulSetUpdateNotRolledOut
 annotations:
-  message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out.
+  message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has
    not been rolled out.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetupdatenotrolledout
 expr: |
  (
@ -171,7 +180,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeDaemonSetRolloutStuck
 annotations:
-  message: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 15 minutes.
+  message: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished
    or progressed for at least 15 minutes.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetrolloutstuck
 expr: |
  (
@ -208,7 +218,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeContainerWaiting
 annotations:
-  message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}} has been in waiting state for longer than 1 hour.
+  message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}}
    has been in waiting state for longer than 1 hour.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting
 expr: |
  sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0
@ -223,7 +234,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeDaemonSetNotScheduled
 annotations:
-  message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled.'
+  message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
    }} are not scheduled.'
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetnotscheduled
 expr: |
  kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"}
@ -240,7 +252,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeDaemonSetMisScheduled
 annotations:
-  message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run.'
+  message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
    }} are running where they are not supposed to run.'
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetmisscheduled
 expr: |
  kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0
@ -255,7 +268,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeJobCompletion
 annotations:
-  message: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than 12 hours to complete.
+  message: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than
    12 hours to complete.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion
 expr: |
  kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"}  > 0
@ -285,7 +299,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeHpaReplicasMismatch
 annotations:
-  message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched the desired number of replicas for longer than 15 minutes.
+  message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched the desired
    number of replicas for longer than 15 minutes.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpareplicasmismatch
 expr: |
  (kube_hpa_status_desired_replicas{job="kube-state-metrics"}
@ -304,7 +319,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeHpaMaxedOut
 annotations:
-  message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running at max replicas for longer than 15 minutes.
+  message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running at max replicas
    for longer than 15 minutes.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpamaxedout
 expr: |
  kube_hpa_status_current_replicas{job="kube-state-metrics"}
@ -323,7 +339,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeCPUOvercommit
 annotations:
-  message: Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure.
+  message: Cluster has overcommitted CPU resource requests for Pods and cannot tolerate
    node failure.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
 expr: |
  sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum{})
@ -342,7 +359,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeMemoryOvercommit
 annotations:
-  message: Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure.
+  message: Cluster has overcommitted memory resource requests for Pods and cannot
    tolerate node failure.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit
 expr: |
  sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum{})
@ -399,7 +417,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeQuotaFullyUsed
 annotations:
-  message: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.
+  message: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
    }} of its {{ $labels.resource }} quota.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotafullyused
 expr: |
  kube_resourcequota{job="kube-state-metrics", type="used"}
@ -417,7 +436,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: CPUThrottlingHigh
 annotations:
-  message: '{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }}.'
+  message: '{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace
    }} for container {{ $labels.container }} in pod {{ $labels.pod }}.'
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
 expr: |
  sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (container, pod, namespace)
@ -437,7 +457,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubePersistentVolumeFillingUp
 annotations:
-  message: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage }} free.
+  message: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in
    Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage }} free.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
 expr: |
  kubelet_volume_stats_available_bytes{job="kubelet"}
@ -455,7 +476,9 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubePersistentVolumeFillingUp
 annotations:
-  message: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available.
+  message: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
    }} in Namespace {{ $labels.namespace }} is expected to fill up within four days.
    Currently {{ $value | humanizePercentage }} is available.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
 expr: |
  (
@ -476,7 +499,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubePersistentVolumeErrors
 annotations:
-  message: The persistent volume {{ $labels.persistentvolume }} has status {{ $labels.phase }}.
+  message: The persistent volume {{ $labels.persistentvolume }} has status {{ $labels.phase
    }}.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeerrors
 expr: |
  kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0
@ -493,7 +517,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeVersionMismatch
 annotations:
-  message: There are {{ $value }} different semantic versions of Kubernetes components running.
+  message: There are {{ $value }} different semantic versions of Kubernetes components
    running.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeversionmismatch
 expr: |
  count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*).*"))) > 1
@ -508,7 +533,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeClientErrors
 annotations:
-  message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.'
+  message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}'
    is experiencing {{ $value | humanizePercentage }} errors.'
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors
 expr: |
  (sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job)
@ -606,7 +632,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeClientCertificateExpiration
 annotations:
-  message: A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.
+  message: A client certificate used to authenticate to the apiserver is expiring
    in less than 7.0 days.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
 expr: |
  apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 604800
@ -620,7 +647,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeClientCertificateExpiration
 annotations:
-  message: A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.
+  message: A client certificate used to authenticate to the apiserver is expiring
    in less than 24.0 hours.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
 expr: |
  apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 86400
@ -634,7 +662,9 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: AggregatedAPIErrors
 annotations:
-  message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. The number of errors have increased for it in the past five minutes. High values indicate that the availability of the service changes too often.
+  message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported
    errors. The number of errors have increased for it in the past five minutes. High
    values indicate that the availability of the service changes too often.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapierrors
 expr: |
  sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[5m])) > 2
@ -648,7 +678,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: AggregatedAPIDown
 annotations:
-  message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 5m.
+  message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only
    {{ $value | humanize }}% available over the last 5m.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapidown
 expr: |
  (1 - max by(name, namespace)(avg_over_time(aggregator_unavailable_apiservice[5m]))) * 100 < 90
@ -709,7 +740,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeletTooManyPods
 annotations:
-  message: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity.
+  message: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage
    }} of its Pod capacity.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods
 expr: |
  count by(node) (
@ -730,7 +762,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeNodeReadinessFlapping
 annotations:
-  message: The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes.
+  message: The readiness status of node {{ $labels.node }} has changed {{ $value }}
    times in the last 15 minutes.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodereadinessflapping
 expr: |
  sum(changes(kube_node_status_condition{status="true",condition="Ready"}[15m])) by (node) > 2
@ -745,7 +778,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeletPlegDurationHigh
 annotations:
-  message: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.
+  message: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration
    of {{ $value }} seconds on node {{ $labels.node }}.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletplegdurationhigh
 expr: |
  node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
@ -760,7 +794,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeletPodStartUpLatencyHigh
 annotations:
-  message: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}.
+  message: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on
    node {{ $labels.node }}.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh
 expr: |
  histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name{job="kubelet"} > 60
--- a/site/content/node-exporter/_index.md
+++ b/site/content/node-exporter/_index.md
@ -23,7 +23,8 @@ Complete list of pregenerated alerts is available [here](https://github.com/moni
 {{< code lang="yaml" >}}
 alert: NodeFilesystemSpaceFillingUp
 annotations:
-  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up.
+  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only
    {{ printf "%.2f" $value }}% available space left and is filling up.
  summary: Filesystem is predicted to run out of space within the next 24 hours.
 expr: |
  (
@ -43,7 +44,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: NodeFilesystemSpaceFillingUp
 annotations:
-  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast.
+  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only
    {{ printf "%.2f" $value }}% available space left and is filling up fast.
  summary: Filesystem is predicted to run out of space within the next 4 hours.
 expr: |
  (
@ -63,7 +65,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: NodeFilesystemAlmostOutOfSpace
 annotations:
-  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
+  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only
    {{ printf "%.2f" $value }}% available space left.
  summary: Filesystem has less than 5% space left.
 expr: |
  (
@ -81,7 +84,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: NodeFilesystemAlmostOutOfSpace
 annotations:
-  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
+  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only
    {{ printf "%.2f" $value }}% available space left.
  summary: Filesystem has less than 3% space left.
 expr: |
  (
@ -99,7 +103,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: NodeFilesystemFilesFillingUp
 annotations:
-  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up.
+  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only
    {{ printf "%.2f" $value }}% available inodes left and is filling up.
  summary: Filesystem is predicted to run out of inodes within the next 24 hours.
 expr: |
  (
@ -119,7 +124,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: NodeFilesystemFilesFillingUp
 annotations:
-  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast.
+  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only
    {{ printf "%.2f" $value }}% available inodes left and is filling up fast.
  summary: Filesystem is predicted to run out of inodes within the next 4 hours.
 expr: |
  (
@ -139,7 +145,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: NodeFilesystemAlmostOutOfFiles
 annotations:
-  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
+  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only
    {{ printf "%.2f" $value }}% available inodes left.
  summary: Filesystem has less than 5% inodes left.
 expr: |
  (
@ -157,7 +164,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: NodeFilesystemAlmostOutOfFiles
 annotations:
-  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
+  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only
    {{ printf "%.2f" $value }}% available inodes left.
  summary: Filesystem has less than 3% inodes left.
 expr: |
  (
@ -175,7 +183,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: NodeNetworkReceiveErrs
 annotations:
-  description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.'
+  description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
    {{ printf "%.0f" $value }} receive errors in the last two minutes.'
  summary: Network interface is reporting many receive errors.
 expr: |
  increase(node_network_receive_errs_total[2m]) > 10
@ -189,7 +198,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: NodeNetworkTransmitErrs
 annotations:
-  description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
+  description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
    {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
  summary: Network interface is reporting many transmit errors.
 expr: |
  increase(node_network_transmit_errs_total[2m]) > 10
@ -229,7 +239,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: NodeClockSkewDetected
 annotations:
-  message: Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure NTP is configured correctly on this host.
+  message: Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure
    NTP is configured correctly on this host.
  summary: Clock skew detected.
 expr: |
  (
@ -253,7 +264,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: NodeClockNotSynchronising
 annotations:
-  message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host.
+  message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured
    on this host.
  summary: Clock not synchronising.
 expr: |
  min_over_time(node_timex_sync_status[5m]) == 0
--- a/site/content/prometheus/_index.md
+++ b/site/content/prometheus/_index.md
@ -35,13 +35,15 @@ labels:
 {{< /code >}}
 ##### PrometheusNotificationQueueRunningFull
-Prometheus alert notification queue predicted to run full in less than 30m.
+Prometheus alert notification queue predicted to run full in less than
 {{< code lang="yaml" >}}
 alert: PrometheusNotificationQueueRunningFull
 annotations:
-  description: Alert notification queue of Prometheus {{$labels.instance}} is running full.
+  description: Alert notification queue of Prometheus {{$labels.instance}} is running
-  summary: Prometheus alert notification queue predicted to run full in less than 30m.
+    full.
  summary: Prometheus alert notification queue predicted to run full in less than
    30m.
 expr: |
  # Without min_over_time, failed scrapes could create false negatives, see
  # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
@ -56,14 +58,17 @@ labels:
 {{< /code >}}
 ##### PrometheusErrorSendingAlertsToSomeAlertmanagers
-'{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus {{$labels.instance}} to Alertmanager {{$labels.alertmanager}}.'
+'{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus
-Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.
+
 Prometheus has encountered more than 1% errors sending alerts to a specific
 {{< code lang="yaml" >}}
 alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
 annotations:
-  description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus {{$labels.instance}} to Alertmanager {{$labels.alertmanager}}.'
+  description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus
-  summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.
+    {{$labels.instance}} to Alertmanager {{$labels.alertmanager}}.'
  summary: Prometheus has encountered more than 1% errors sending alerts to a specific
    Alertmanager.
 expr: |
  (
    rate(prometheus_notifications_errors_total{job="prometheus"}[5m])
@ -78,13 +83,14 @@ labels:
 {{< /code >}}
 ##### PrometheusErrorSendingAlertsToAnyAlertmanager
-'{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.instance}} to any Alertmanager.'
+'{{ printf "%.1f" $value }}% minimum errors while sending alerts from
 Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
 {{< code lang="yaml" >}}
 alert: PrometheusErrorSendingAlertsToAnyAlertmanager
 annotations:
-  description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.instance}} to any Alertmanager.'
+  description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from
    Prometheus {{$labels.instance}} to any Alertmanager.'
  summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
 expr: |
  min without(alertmanager) (
@ -120,7 +126,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: PrometheusTSDBReloadsFailing
 annotations:
-  description: Prometheus {{$labels.instance}} has detected {{$value | humanize}} reload failures over the last 3h.
+  description: Prometheus {{$labels.instance}} has detected {{$value | humanize}}
    reload failures over the last 3h.
  summary: Prometheus has issues reloading blocks from disk.
 expr: |
  increase(prometheus_tsdb_reloads_failures_total{job="prometheus"}[3h]) > 0
@ -134,7 +141,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: PrometheusTSDBCompactionsFailing
 annotations:
-  description: Prometheus {{$labels.instance}} has detected {{$value | humanize}} compaction failures over the last 3h.
+  description: Prometheus {{$labels.instance}} has detected {{$value | humanize}}
    compaction failures over the last 3h.
  summary: Prometheus has issues compacting blocks.
 expr: |
  increase(prometheus_tsdb_compactions_failed_total{job="prometheus"}[3h]) > 0
@ -162,7 +170,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: PrometheusDuplicateTimestamps
 annotations:
-  description: Prometheus {{$labels.instance}} is dropping {{ printf "%.4g" $value  }} samples/s with different values but duplicated timestamp.
+  description: Prometheus {{$labels.instance}} is dropping {{ printf "%.4g" $value  }}
    samples/s with different values but duplicated timestamp.
  summary: Prometheus is dropping samples with duplicate timestamps.
 expr: |
  rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus"}[5m]) > 0
@ -176,7 +185,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: PrometheusOutOfOrderTimestamps
 annotations:
-  description: Prometheus {{$labels.instance}} is dropping {{ printf "%.4g" $value  }} samples/s with timestamps arriving out of order.
+  description: Prometheus {{$labels.instance}} is dropping {{ printf "%.4g" $value  }}
    samples/s with timestamps arriving out of order.
  summary: Prometheus drops samples with out-of-order timestamps.
 expr: |
  rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus"}[5m]) > 0
@ -190,7 +200,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: PrometheusRemoteStorageFailures
 annotations:
-  description: Prometheus {{$labels.instance}} failed to send {{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }}
+  description: Prometheus {{$labels.instance}} failed to send {{ printf "%.1f" $value
    }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }}
  summary: Prometheus fails to send samples to remote storage.
 expr: |
  (
@ -214,7 +225,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: PrometheusRemoteWriteBehind
 annotations:
-  description: Prometheus {{$labels.instance}} remote write is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}.
+  description: Prometheus {{$labels.instance}} remote write is {{ printf "%.1f" $value
    }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}.
  summary: Prometheus remote write is behind.
 expr: |
  # Without max_over_time, failed scrapes could create false negatives, see
@ -235,8 +247,12 @@ labels:
 {{< code lang="yaml" >}}
 alert: PrometheusRemoteWriteDesiredShards
 annotations:
-  description: Prometheus {{$labels.instance}} remote write desired shards calculation wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus"}` $labels.instance | query | first | value }}.
+  description: Prometheus {{$labels.instance}} remote write desired shards calculation
-  summary: Prometheus remote write desired shards calculation wants to run more than configured max shards.
+    wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url
    }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus"}`
    $labels.instance | query | first | value }}.
  summary: Prometheus remote write desired shards calculation wants to run more than
    configured max shards.
 expr: |
  # Without max_over_time, failed scrapes could create false negatives, see
  # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
@ -255,7 +271,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: PrometheusRuleFailures
 annotations:
-  description: Prometheus {{$labels.instance}} has failed to evaluate {{ printf "%.0f" $value }} rules in the last 5m.
+  description: Prometheus {{$labels.instance}} has failed to evaluate {{ printf "%.0f"
    $value }} rules in the last 5m.
  summary: Prometheus is failing rule evaluations.
 expr: |
  increase(prometheus_rule_evaluation_failures_total{job="prometheus"}[5m]) > 0
@ -269,7 +286,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: PrometheusMissingRuleEvaluations
 annotations:
-  description: Prometheus {{$labels.instance}} has missed {{ printf "%.0f" $value }} rule group evaluations in the last 5m.
+  description: Prometheus {{$labels.instance}} has missed {{ printf "%.0f" $value
    }} rule group evaluations in the last 5m.
  summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
 expr: |
  increase(prometheus_rule_group_iterations_missed_total{job="prometheus"}[5m]) > 0
@ -283,8 +301,10 @@ labels:
 {{< code lang="yaml" >}}
 alert: PrometheusTargetLimitHit
 annotations:
-  description: Prometheus {{$labels.instance}} has dropped {{ printf "%.0f" $value }} targets because the number of targets exceeded the configured target_limit.
+  description: Prometheus {{$labels.instance}} has dropped {{ printf "%.0f" $value
-  summary: Prometheus has dropped targets because some scrape configs have exceeded the targets limit.
+    }} targets because the number of targets exceeded the configured target_limit.
  summary: Prometheus has dropped targets because some scrape configs have exceeded
    the targets limit.
 expr: |
  increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus"}[5m]) > 0
 for: 15m
@ -296,5 +316,5 @@ labels:
 Following dashboards are generated from mixins and hosted on github:
 - [prometheus-remote-write](https://github.com/monitoring-mixins/website/blob/master/assets/prometheus/dashboards/prometheus-remote-write.json)
 - [prometheus](https://github.com/monitoring-mixins/website/blob/master/assets/prometheus/dashboards/prometheus.json)
 - [prometheus-remote-write](https://github.com/monitoring-mixins/website/blob/master/assets/prometheus/dashboards/prometheus-remote-write.json)
--- a/site/content/thanos/_index.md
+++ b/site/content/thanos/_index.md
@ -23,7 +23,8 @@ Complete list of pregenerated alerts is available [here](https://github.com/moni
 {{< code lang="yaml" >}}
 alert: ThanosCompactMultipleRunning
 annotations:
-  message: No more than one Thanos Compact instance should be running at once. There are {{ $value }}
+  message: No more than one Thanos Compact instance should be running at once. There
    are {{ $value }}
 expr: sum(up{job=~"thanos-compact.*"}) > 1
 for: 5m
 labels:
@ -47,7 +48,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosCompactHighCompactionFailures
 annotations:
-  message: Thanos Compact {{$labels.job}} is failing to execute {{ $value | humanize }}% of compactions.
+  message: Thanos Compact {{$labels.job}} is failing to execute {{ $value | humanize
    }}% of compactions.
 expr: |
  (
    sum by (job) (rate(thanos_compact_group_compactions_failures_total{job=~"thanos-compact.*"}[5m]))
@ -65,7 +67,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosCompactBucketHighOperationFailures
 annotations:
-  message: Thanos Compact {{$labels.job}} Bucket is failing to execute {{ $value | humanize }}% of operations.
+  message: Thanos Compact {{$labels.job}} Bucket is failing to execute {{ $value |
    humanize }}% of operations.
 expr: |
  (
    sum by (job) (rate(thanos_objstore_bucket_operation_failures_total{job=~"thanos-compact.*"}[5m]))
@ -84,7 +87,8 @@ labels:
 alert: ThanosCompactHasNotRun
 annotations:
  message: Thanos Compact {{$labels.job}} has not uploaded anything for 24 hours.
-expr: (time() - max(max_over_time(thanos_objstore_bucket_last_successful_upload_time{job=~"thanos-compact.*"}[24h]))) / 60 / 60 > 24
+expr: (time() - max(max_over_time(thanos_objstore_bucket_last_successful_upload_time{job=~"thanos-compact.*"}[24h])))
  / 60 / 60 > 24
 labels:
  severity: warning
 {{< /code >}}
@ -96,7 +100,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosQueryHttpRequestQueryErrorRateHigh
 annotations:
-  message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize }}% of "query" requests.
+  message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize
    }}% of "query" requests.
 expr: |
  (
    sum(rate(http_requests_total{code=~"5..", job=~"thanos-query.*", handler="query"}[5m]))
@ -113,7 +118,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosQueryHttpRequestQueryRangeErrorRateHigh
 annotations:
-  message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize }}% of "query_range" requests.
+  message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize
    }}% of "query_range" requests.
 expr: |
  (
    sum(rate(http_requests_total{code=~"5..", job=~"thanos-query.*", handler="query_range"}[5m]))
@ -130,7 +136,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosQueryGrpcServerErrorRate
 annotations:
-  message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.
+  message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize
    }}% of requests.
 expr: |
  (
    sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-query.*"}[5m]))
@ -148,7 +155,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosQueryGrpcClientErrorRate
 annotations:
-  message: Thanos Query {{$labels.job}} is failing to send {{ $value | humanize }}% of requests.
+  message: Thanos Query {{$labels.job}} is failing to send {{ $value | humanize }}%
    of requests.
 expr: |
  (
    sum by (job) (rate(grpc_client_handled_total{grpc_code!="OK", job=~"thanos-query.*"}[5m]))
@ -165,7 +173,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosQueryHighDNSFailures
 annotations:
-  message: Thanos Query {{$labels.job}} have {{ $value | humanize }}% of failing DNS queries for store endpoints.
+  message: Thanos Query {{$labels.job}} have {{ $value | humanize }}% of failing DNS
    queries for store endpoints.
 expr: |
  (
    sum by (job) (rate(thanos_querier_store_apis_dns_failures_total{job=~"thanos-query.*"}[5m]))
@ -182,7 +191,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosQueryInstantLatencyHigh
 annotations:
-  message: Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for instant queries.
+  message: Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value
    }} seconds for instant queries.
 expr: |
  (
    histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~"thanos-query.*", handler="query"}[5m]))) > 40
@ -199,7 +209,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosQueryRangeLatencyHigh
 annotations:
-  message: Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for range queries.
+  message: Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value
    }} seconds for range queries.
 expr: |
  (
    histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~"thanos-query.*", handler="query_range"}[5m]))) > 90
@ -218,7 +229,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosReceiveHttpRequestErrorRateHigh
 annotations:
-  message: Thanos Receive {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.
+  message: Thanos Receive {{$labels.job}} is failing to handle {{ $value | humanize
    }}% of requests.
 expr: |
  (
    sum(rate(http_requests_total{code=~"5..", job=~"thanos-receive.*", handler="receive"}[5m]))
@ -235,7 +247,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosReceiveHttpRequestLatencyHigh
 annotations:
-  message: Thanos Receive {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for requests.
+  message: Thanos Receive {{$labels.job}} has a 99th percentile latency of {{ $value
    }} seconds for requests.
 expr: |
  (
    histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~"thanos-receive.*", handler="receive"}[5m]))) > 10
@ -252,7 +265,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosReceiveHighReplicationFailures
 annotations:
-  message: Thanos Receive {{$labels.job}} is failing to replicate {{ $value | humanize }}% of requests.
+  message: Thanos Receive {{$labels.job}} is failing to replicate {{ $value | humanize
    }}% of requests.
 expr: |
  thanos_receive_replication_factor > 1
    and
@ -279,7 +293,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosReceiveHighForwardRequestFailures
 annotations:
-  message: Thanos Receive {{$labels.job}} is failing to forward {{ $value | humanize }}% of requests.
+  message: Thanos Receive {{$labels.job}} is failing to forward {{ $value | humanize
    }}% of requests.
 expr: |
  (
    sum by (job) (rate(thanos_receive_forward_requests_total{result="error", job=~"thanos-receive.*"}[5m]))
@ -296,7 +311,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosReceiveHighHashringFileRefreshFailures
 annotations:
-  message: Thanos Receive {{$labels.job}} is failing to refresh hashring file, {{ $value | humanize }} of attempts failed.
+  message: Thanos Receive {{$labels.job}} is failing to refresh hashring file, {{
    $value | humanize }} of attempts failed.
 expr: |
  (
    sum by (job) (rate(thanos_receive_hashrings_file_errors_total{job=~"thanos-receive.*"}[5m]))
@ -315,7 +331,8 @@ labels:
 alert: ThanosReceiveConfigReloadFailure
 annotations:
  message: Thanos Receive {{$labels.job}} has not been able to reload hashring configurations.
-expr: avg(thanos_receive_config_last_reload_successful{job=~"thanos-receive.*"}) by (job) != 1
+expr: avg(thanos_receive_config_last_reload_successful{job=~"thanos-receive.*"}) by
  (job) != 1
 for: 5m
 labels:
  severity: warning
@ -326,7 +343,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosReceiveNoUpload
 annotations:
-  message: Thanos Receive {{ $labels.instance }} of {{$labels.job}} has not uploaded latest data to object storage.
+  message: Thanos Receive {{ $labels.instance }} of {{$labels.job}} has not uploaded
    latest data to object storage.
 expr: |
  (up{job=~"thanos-receive.*"} - 1)
  + on (instance) # filters to only alert on current instance last 3h
@ -356,7 +374,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosSidecarUnhealthy
 annotations:
-  message: Thanos Sidecar {{$labels.job}} {{$labels.pod}} is unhealthy for {{ $value }} seconds.
+  message: Thanos Sidecar {{$labels.job}} {{$labels.pod}} is unhealthy for {{ $value
    }} seconds.
 expr: |
  time() - max(thanos_sidecar_last_heartbeat_success_time_seconds{job=~"thanos-sidecar.*"}) by (job, pod) >= 600
 labels:
@ -370,7 +389,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosStoreGrpcErrorRate
 annotations:
-  message: Thanos Store {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.
+  message: Thanos Store {{$labels.job}} is failing to handle {{ $value | humanize
    }}% of requests.
 expr: |
  (
    sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-store.*"}[5m]))
@ -388,7 +408,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosStoreSeriesGateLatencyHigh
 annotations:
-  message: Thanos Store {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for store series gate requests.
+  message: Thanos Store {{$labels.job}} has a 99th percentile latency of {{ $value
    }} seconds for store series gate requests.
 expr: |
  (
    histogram_quantile(0.9, sum by (job, le) (rate(thanos_bucket_store_series_gate_duration_seconds_bucket{job=~"thanos-store.*"}[5m]))) > 2
@ -405,7 +426,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosStoreBucketHighOperationFailures
 annotations:
-  message: Thanos Store {{$labels.job}} Bucket is failing to execute {{ $value | humanize }}% of operations.
+  message: Thanos Store {{$labels.job}} Bucket is failing to execute {{ $value | humanize
    }}% of operations.
 expr: |
  (
    sum by (job) (rate(thanos_objstore_bucket_operation_failures_total{job=~"thanos-store.*"}[5m]))
@ -423,7 +445,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosStoreObjstoreOperationLatencyHigh
 annotations:
-  message: Thanos Store {{$labels.job}} Bucket has a 99th percentile latency of {{ $value }} seconds for the bucket operations.
+  message: Thanos Store {{$labels.job}} Bucket has a 99th percentile latency of {{
    $value }} seconds for the bucket operations.
 expr: |
  (
    histogram_quantile(0.9, sum by (job, le) (rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~"thanos-store.*"}[5m]))) > 2
@ -452,12 +475,13 @@ labels:
 {{< /code >}}
 ##### ThanosRuleSenderIsFailingAlerts
-Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to send alerts to alertmanager.
+Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to send alerts to
 {{< code lang="yaml" >}}
 alert: ThanosRuleSenderIsFailingAlerts
 annotations:
-  message: Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to send alerts to alertmanager.
+  message: Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to send alerts to
    alertmanager.
 expr: |
  sum by (job) (rate(thanos_alert_sender_alerts_dropped_total{job=~"thanos-rule.*"}[5m])) > 0
 for: 5m
@ -488,7 +512,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosRuleHighRuleEvaluationWarnings
 annotations:
-  message: Thanos Rule {{$labels.job}} {{$labels.pod}} has high number of evaluation warnings.
+  message: Thanos Rule {{$labels.job}} {{$labels.pod}} has high number of evaluation
    warnings.
 expr: |
  sum by (job) (rate(thanos_rule_evaluation_with_warnings_total{job=~"thanos-rule.*"}[5m])) > 0
 for: 15m
@ -501,7 +526,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosRuleRuleEvaluationLatencyHigh
 annotations:
-  message: Thanos Rule {{$labels.job}}/{{$labels.pod}} has higher evaluation latency than interval for {{$labels.rule_group}}.
+  message: Thanos Rule {{$labels.job}}/{{$labels.pod}} has higher evaluation latency
    than interval for {{$labels.rule_group}}.
 expr: |
  (
    sum by (job, pod, rule_group) (prometheus_rule_group_last_duration_seconds{job=~"thanos-rule.*"})
@ -518,7 +544,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosRuleGrpcErrorRate
 annotations:
-  message: Thanos Rule {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.
+  message: Thanos Rule {{$labels.job}} is failing to handle {{ $value | humanize }}%
    of requests.
 expr: |
  (
    sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-rule.*"}[5m]))
@ -537,7 +564,8 @@ labels:
 alert: ThanosRuleConfigReloadFailure
 annotations:
  message: Thanos Rule {{$labels.job}} has not been able to reload its configuration.
-expr: avg(thanos_rule_config_last_reload_successful{job=~"thanos-rule.*"}) by (job) != 1
+expr: avg(thanos_rule_config_last_reload_successful{job=~"thanos-rule.*"}) by (job)
  != 1
 for: 5m
 labels:
  severity: info
@ -548,7 +576,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosRuleQueryHighDNSFailures
 annotations:
-  message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing DNS queries for query endpoints.
+  message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing DNS
    queries for query endpoints.
 expr: |
  (
    sum by (job) (rate(thanos_ruler_query_apis_dns_failures_total{job=~"thanos-rule.*"}[5m]))
@ -566,7 +595,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosRuleAlertmanagerHighDNSFailures
 annotations:
-  message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing DNS queries for Alertmanager endpoints.
+  message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing DNS
    queries for Alertmanager endpoints.
 expr: |
  (
    sum by (job) (rate(thanos_ruler_alertmanagers_dns_failures_total{job=~"thanos-rule.*"}[5m]))
@ -584,7 +614,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosRuleNoEvaluationFor10Intervals
 annotations:
-  message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% rule groups that did not evaluate for at least 10x of their expected interval.
+  message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% rule groups that
    did not evaluate for at least 10x of their expected interval.
 expr: |
  time() -  max by (job, group) (prometheus_rule_group_last_evaluation_timestamp_seconds{job=~"thanos-rule.*"})
  >
@ -599,7 +630,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosNoRuleEvaluations
 annotations:
-  message: Thanos Rule {{$labels.job}} did not perform any rule evaluations in the past 2 minutes.
+  message: Thanos Rule {{$labels.job}} did not perform any rule evaluations in the
    past 2 minutes.
 expr: |
  sum(rate(prometheus_rule_evaluations_total{job=~"thanos-rule.*"}[2m])) <= 0
    and
@ -726,7 +758,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosBucketReplicateRunLatency
 annotations:
-  message: Thanos Replicate {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for the replicate operations.
+  message: Thanos Replicate {{$labels.job}} has a 99th percentile latency of {{ $value
    }} seconds for the replicate operations.
 expr: |
  (
    histogram_quantile(0.9, sum by (job, le) (rate(thanos_replicate_replication_run_duration_seconds_bucket{job=~"thanos-bucket-replicate.*"}[5m]))) > 20