assets,site/content: regenerate

2024-12-14 11:37:31 +00:00 · 2020-08-13 12:50:10 +02:00 · 2020-08-13 12:50:10 +02:00 · 7fd2bee5a7
commit 7fd2bee5a7
parent df43594957
25 changed files with 1134 additions and 535 deletions
--- a/assets/ceph/alerts.yaml
+++ b/assets/ceph/alerts.yaml
@ -27,7 +27,8 @@ groups:
  rules:
  - alert: CephMdsMissingReplicas
    annotations:
-      description: Minimum required replicas for storage metadata service not available. Might affect the working of storage cluster.
+      description: Minimum required replicas for storage metadata service not available.
+        Might affect the working of storage cluster.
      message: Insufficient replicas for storage metadata service.
      severity_level: warning
      storage_type: ceph
@ -51,7 +52,8 @@ groups:
      severity: critical
  - alert: CephMonHighNumberOfLeaderChanges
    annotations:
-      description: Ceph Monitor {{ $labels.ceph_daemon }} on host {{ $labels.hostname }} has seen {{ $value | printf "%.2f" }} leader changes per minute recently.
+      description: Ceph Monitor {{ $labels.ceph_daemon }} on host {{ $labels.hostname
+        }} has seen {{ $value | printf "%.2f" }} leader changes per minute recently.
      message: Storage Cluster has seen many leader changes recently.
      severity_level: warning
      storage_type: ceph
@ -64,7 +66,8 @@ groups:
  rules:
  - alert: CephNodeDown
    annotations:
-      description: Storage node {{ $labels.node }} went down. Please check the node immediately.
+      description: Storage node {{ $labels.node }} went down. Please check the node
+        immediately.
      message: Storage node {{ $labels.node }} went down
      severity_level: error
      storage_type: ceph
@ -77,7 +80,9 @@ groups:
  rules:
  - alert: CephOSDCriticallyFull
    annotations:
-      description: Utilization of back-end storage device {{ $labels.ceph_daemon }} has crossed 85% on host {{ $labels.hostname }}. Immediately free up some space or expand the storage cluster or contact support.
+      description: Utilization of back-end storage device {{ $labels.ceph_daemon }}
+        has crossed 85% on host {{ $labels.hostname }}. Immediately free up some space
+        or expand the storage cluster or contact support.
      message: Back-end storage device is critically full.
      severity_level: error
      storage_type: ceph
@ -88,7 +93,9 @@ groups:
      severity: critical
  - alert: CephOSDNearFull
    annotations:
-      description: Utilization of back-end storage device {{ $labels.ceph_daemon }} has crossed 75% on host {{ $labels.hostname }}. Free up some space or expand the storage cluster or contact support.
+      description: Utilization of back-end storage device {{ $labels.ceph_daemon }}
+        has crossed 75% on host {{ $labels.hostname }}. Free up some space or expand
+        the storage cluster or contact support.
      message: Back-end storage device is nearing full.
      severity_level: warning
      storage_type: ceph
@ -99,7 +106,8 @@ groups:
      severity: warning
  - alert: CephOSDDiskNotResponding
    annotations:
-      description: Disk device {{ $labels.device }} not responding, on host {{ $labels.host }}.
+      description: Disk device {{ $labels.device }} not responding, on host {{ $labels.host
+        }}.
      message: Disk not responding
      severity_level: error
      storage_type: ceph
@ -110,7 +118,8 @@ groups:
      severity: critical
  - alert: CephOSDDiskUnavailable
    annotations:
-      description: Disk device {{ $labels.device }} not accessible on host {{ $labels.host }}.
+      description: Disk device {{ $labels.device }} not accessible on host {{ $labels.host
+        }}.
      message: Disk not accessible
      severity_level: error
      storage_type: ceph
@ -145,8 +154,10 @@ groups:
  rules:
  - alert: PersistentVolumeUsageNearFull
    annotations:
-      description: PVC {{ $labels.persistentvolumeclaim }} utilization has crossed 75%. Free up some space or expand the PVC.
-      message: PVC {{ $labels.persistentvolumeclaim }} is nearing full. Data deletion or PVC expansion is required.
+      description: PVC {{ $labels.persistentvolumeclaim }} utilization has crossed
+        75%. Free up some space or expand the PVC.
+      message: PVC {{ $labels.persistentvolumeclaim }} is nearing full. Data deletion
+        or PVC expansion is required.
      severity_level: warning
      storage_type: ceph
    expr: |
@ -156,8 +167,10 @@ groups:
      severity: warning
  - alert: PersistentVolumeUsageCritical
    annotations:
-      description: PVC {{ $labels.persistentvolumeclaim }} utilization has crossed 85%. Free up some space or expand the PVC immediately.
-      message: PVC {{ $labels.persistentvolumeclaim }} is critically full. Data deletion or PVC expansion is required.
+      description: PVC {{ $labels.persistentvolumeclaim }} utilization has crossed
+        85%. Free up some space or expand the PVC immediately.
+      message: PVC {{ $labels.persistentvolumeclaim }} is critically full. Data deletion
+        or PVC expansion is required.
      severity_level: error
      storage_type: ceph
    expr: |
@ -191,7 +204,8 @@ groups:
      severity: warning
  - alert: CephOSDVersionMismatch
    annotations:
-      description: There are {{ $value }} different versions of Ceph OSD components running.
+      description: There are {{ $value }} different versions of Ceph OSD components
+        running.
      message: There are multiple versions of storage services running.
      severity_level: warning
      storage_type: ceph
@ -202,7 +216,8 @@ groups:
      severity: warning
  - alert: CephMonVersionMismatch
    annotations:
-      description: There are {{ $value }} different versions of Ceph Mon components running.
+      description: There are {{ $value }} different versions of Ceph Mon components
+        running.
      message: There are multiple versions of storage services running.
      severity_level: warning
      storage_type: ceph
@ -215,8 +230,10 @@ groups:
  rules:
  - alert: CephClusterNearFull
    annotations:
-      description: Storage cluster utilization has crossed 75% and will become read-only at 85%. Free up some space or expand the storage cluster.
-      message: Storage cluster is nearing full. Data deletion or cluster expansion is required.
+      description: Storage cluster utilization has crossed 75% and will become read-only
+        at 85%. Free up some space or expand the storage cluster.
+      message: Storage cluster is nearing full. Data deletion or cluster expansion
+        is required.
      severity_level: warning
      storage_type: ceph
    expr: |
@ -226,8 +243,10 @@ groups:
      severity: warning
  - alert: CephClusterCriticallyFull
    annotations:
-      description: Storage cluster utilization has crossed 80% and will become read-only at 85%. Free up some space or expand the storage cluster immediately.
-      message: Storage cluster is critically full and needs immediate data deletion or cluster expansion.
+      description: Storage cluster utilization has crossed 80% and will become read-only
+        at 85%. Free up some space or expand the storage cluster immediately.
+      message: Storage cluster is critically full and needs immediate data deletion
+        or cluster expansion.
      severity_level: error
      storage_type: ceph
    expr: |
@ -237,8 +256,10 @@ groups:
      severity: critical
  - alert: CephClusterReadOnly
    annotations:
-      description: Storage cluster utilization has crossed 85% and will become read-only now. Free up some space or expand the storage cluster immediately.
-      message: Storage cluster is read-only now and needs immediate data deletion or cluster expansion.
+      description: Storage cluster utilization has crossed 85% and will become read-only
+        now. Free up some space or expand the storage cluster immediately.
+      message: Storage cluster is read-only now and needs immediate data deletion
+        or cluster expansion.
      severity_level: error
      storage_type: ceph
    expr: |
--- a/assets/coredns/alerts.yaml
+++ b/assets/coredns/alerts.yaml
@ -12,7 +12,8 @@ groups:
      severity: critical
  - alert: CoreDNSLatencyHigh
    annotations:
-      message: CoreDNS has 99th percentile latency of {{ $value }} seconds for server {{ $labels.server }} zone {{ $labels.zone }} .
+      message: CoreDNS has 99th percentile latency of {{ $value }} seconds for server
+        {{ $labels.server }} zone {{ $labels.zone }} .
      runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednslatencyhigh
    expr: |
      histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_seconds_bucket{job="kube-dns"}[5m])) by(server, zone, le)) > 4
@ -21,7 +22,8 @@ groups:
      severity: critical
  - alert: CoreDNSErrorsHigh
    annotations:
-      message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of requests.
+      message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }}
+        of requests.
      runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednserrorshigh
    expr: |
      sum(rate(coredns_dns_response_rcode_count_total{job="kube-dns",rcode="SERVFAIL"}[5m]))
@ -32,7 +34,8 @@ groups:
      severity: critical
  - alert: CoreDNSErrorsHigh
    annotations:
-      message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of requests.
+      message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }}
+        of requests.
      runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednserrorshigh
    expr: |
      sum(rate(coredns_dns_response_rcode_count_total{job="kube-dns",rcode="SERVFAIL"}[5m]))
@ -45,7 +48,8 @@ groups:
  rules:
  - alert: CoreDNSForwardLatencyHigh
    annotations:
-      message: CoreDNS has 99th percentile latency of {{ $value }} seconds forwarding requests to {{ $labels.to }}.
+      message: CoreDNS has 99th percentile latency of {{ $value }} seconds forwarding
+        requests to {{ $labels.to }}.
      runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsforwardlatencyhigh
    expr: |
      histogram_quantile(0.99, sum(rate(coredns_forward_request_duration_seconds_bucket{job="kube-dns"}[5m])) by(to, le)) > 4
@ -54,7 +58,8 @@ groups:
      severity: critical
  - alert: CoreDNSForwardErrorsHigh
    annotations:
-      message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of forward requests to {{ $labels.to }}.
+      message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }}
+        of forward requests to {{ $labels.to }}.
      runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsforwarderrorshigh
    expr: |
      sum(rate(coredns_forward_response_rcode_count_total{job="kube-dns",rcode="SERVFAIL"}[5m]))
@ -65,7 +70,8 @@ groups:
      severity: critical
  - alert: CoreDNSForwardErrorsHigh
    annotations:
-      message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of forward requests to {{ $labels.to }}.
+      message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }}
+        of forward requests to {{ $labels.to }}.
      runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsforwarderrorshigh
    expr: |
      sum(rate(coredns_dns_response_rcode_count_total{job="kube-dns",rcode="SERVFAIL"}[5m]))
--- a/assets/cortex/alerts.yaml
+++ b/assets/cortex/alerts.yaml
@ -107,7 +107,8 @@ groups:
      severity: warning
  - alert: CortexIngesterRestarts
    annotations:
-      message: '{{ $labels.job }}/{{ $labels.instance }} has restarted {{ printf "%.2f" $value }} times in the last 30 mins.'
+      message: '{{ $labels.job }}/{{ $labels.instance }} has restarted {{ printf "%.2f"
+        $value }} times in the last 30 mins.'
    expr: |
      changes(process_start_time_seconds{job=~".+(cortex|ingester)"}[30m]) > 1
    labels:
@ -278,7 +279,8 @@ groups:
  rules:
  - alert: CortexGossipMembersMismatch
    annotations:
-      message: '{{ $labels.job }}/{{ $labels.instance }} sees incorrect number of gossip members.'
+      message: '{{ $labels.job }}/{{ $labels.instance }} sees incorrect number of
+        gossip members.'
    expr: |
      memberlist_client_cluster_members_count
        != on (cluster, namespace) group_left
@ -290,7 +292,8 @@ groups:
  rules:
  - alert: CortexIngesterHasNotShippedBlocks
    annotations:
-      message: Cortex Ingester {{ $labels.namespace }}/{{ $labels.instance }} has not shipped any block in the last 4 hours.
+      message: Cortex Ingester {{ $labels.namespace }}/{{ $labels.instance }} has
+        not shipped any block in the last 4 hours.
    expr: |
      (min by(namespace, instance) (time() - thanos_objstore_bucket_last_successful_upload_time{job=~".+/ingester"}) > 60 * 60 * 4)
      and
@ -302,7 +305,8 @@ groups:
      severity: critical
  - alert: CortexIngesterHasNotShippedBlocksSinceStart
    annotations:
-      message: Cortex Ingester {{ $labels.namespace }}/{{ $labels.instance }} has not shipped any block in the last 4 hours.
+      message: Cortex Ingester {{ $labels.namespace }}/{{ $labels.instance }} has
+        not shipped any block in the last 4 hours.
    expr: |
      (max by(namespace, instance) (thanos_objstore_bucket_last_successful_upload_time{job=~".+/ingester"}) == 0)
      and
@ -312,7 +316,8 @@ groups:
      severity: critical
  - alert: CortexIngesterTSDBHeadCompactionFailed
    annotations:
-      message: Cortex Ingester {{ $labels.namespace }}/{{ $labels.instance }} is failing to compact TSDB head.
+      message: Cortex Ingester {{ $labels.namespace }}/{{ $labels.instance }} is failing
+        to compact TSDB head.
    expr: |
      rate(cortex_ingester_tsdb_compactions_failed_total[5m]) > 0
    for: 15m
@ -320,7 +325,8 @@ groups:
      severity: critical
  - alert: CortexQuerierHasNotScanTheBucket
    annotations:
-      message: Cortex Querier {{ $labels.namespace }}/{{ $labels.instance }} has not successfully scanned the bucket since {{ $value | humanizeDuration }}.
+      message: Cortex Querier {{ $labels.namespace }}/{{ $labels.instance }} has not
+        successfully scanned the bucket since {{ $value | humanizeDuration }}.
    expr: |
      (time() - cortex_querier_blocks_last_successful_scan_timestamp_seconds > 60 * 30)
      and
@ -330,7 +336,9 @@ groups:
      severity: critical
  - alert: CortexQuerierHighRefetchRate
    annotations:
-      message: Cortex Queries in {{ $labels.namespace }} are refetching series from different store-gateways (because of missing blocks) for the {{ printf "%.0f" $value }}% of queries.
+      message: Cortex Queries in {{ $labels.namespace }} are refetching series from
+        different store-gateways (because of missing blocks) for the {{ printf "%.0f"
+        $value }}% of queries.
    expr: |
      100 * (
        (
@ -347,7 +355,9 @@ groups:
      severity: warning
  - alert: CortexStoreGatewayHasNotSyncTheBucket
    annotations:
-      message: Cortex Store Gateway {{ $labels.namespace }}/{{ $labels.instance }} has not successfully synched the bucket since {{ $value | humanizeDuration }}.
+      message: Cortex Store Gateway {{ $labels.namespace }}/{{ $labels.instance }}
+        has not successfully synched the bucket since {{ $value | humanizeDuration
+        }}.
    expr: |
      (time() - cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component="store-gateway"} > 60 * 30)
      and
@ -359,7 +369,8 @@ groups:
  rules:
  - alert: CortexCompactorHasNotSuccessfullyCleanedUpBlocks
    annotations:
-      message: Cortex Compactor {{ $labels.namespace }}/{{ $labels.instance }} has not successfully cleaned up blocks in the last 24 hours.
+      message: Cortex Compactor {{ $labels.namespace }}/{{ $labels.instance }} has
+        not successfully cleaned up blocks in the last 24 hours.
    expr: |
      (time() - cortex_compactor_block_cleanup_last_successful_run_timestamp_seconds > 60 * 60 * 24)
      and
@ -369,7 +380,8 @@ groups:
      severity: critical
  - alert: CortexCompactorHasNotSuccessfullyCleanedUpBlocksSinceStart
    annotations:
-      message: Cortex Compactor {{ $labels.namespace }}/{{ $labels.instance }} has not successfully cleaned up blocks in the last 24 hours.
+      message: Cortex Compactor {{ $labels.namespace }}/{{ $labels.instance }} has
+        not successfully cleaned up blocks in the last 24 hours.
    expr: |
      cortex_compactor_block_cleanup_last_successful_run_timestamp_seconds == 0
    for: 24h
@ -377,7 +389,8 @@ groups:
      severity: critical
  - alert: CortexCompactorHasNotUploadedBlocks
    annotations:
-      message: Cortex Compactor {{ $labels.namespace }}/{{ $labels.instance }} has not uploaded any block in the last 24 hours.
+      message: Cortex Compactor {{ $labels.namespace }}/{{ $labels.instance }} has
+        not uploaded any block in the last 24 hours.
    expr: |
      (time() - thanos_objstore_bucket_last_successful_upload_time{job=~".+/compactor"} > 60 * 60 * 24)
      and
@ -387,7 +400,8 @@ groups:
      severity: critical
  - alert: CortexCompactorHasNotUploadedBlocksSinceStart
    annotations:
-      message: Cortex Compactor {{ $labels.namespace }}/{{ $labels.instance }} has not uploaded any block in the last 24 hours.
+      message: Cortex Compactor {{ $labels.namespace }}/{{ $labels.instance }} has
+        not uploaded any block in the last 24 hours.
    expr: |
      thanos_objstore_bucket_last_successful_upload_time{job=~".+/compactor"} == 0
    for: 24h
--- a/assets/cortex/rules.yaml
+++ b/assets/cortex/rules.yaml
@ -1,11 +1,14 @@
 groups:
 - name: cortex_api
  rules:
-  - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m]))
+      by (le, cluster, job))
    record: cluster_job:cortex_request_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m]))
+      by (le, cluster, job))
    record: cluster_job:cortex_request_duration_seconds:50quantile
-  - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(cortex_request_duration_seconds_count[1m]))
+      by (cluster, job)
    record: cluster_job:cortex_request_duration_seconds:avg
  - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job)
    record: cluster_job:cortex_request_duration_seconds_bucket:sum_rate
@ -13,185 +16,279 @@ groups:
    record: cluster_job:cortex_request_duration_seconds_sum:sum_rate
  - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job)
    record: cluster_job:cortex_request_duration_seconds_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job, route))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m]))
+      by (le, cluster, job, route))
    record: cluster_job_route:cortex_request_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job, route))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m]))
+      by (le, cluster, job, route))
    record: cluster_job_route:cortex_request_duration_seconds:50quantile
-  - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route) / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, route)
+  - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route)
+      / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, route)
    record: cluster_job_route:cortex_request_duration_seconds:avg
-  - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job, route)
+  - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, job,
+      route)
    record: cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate
  - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, job, route)
    record: cluster_job_route:cortex_request_duration_seconds_sum:sum_rate
  - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, job, route)
    record: cluster_job_route:cortex_request_duration_seconds_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_request_duration_seconds_bucket[1m]))
+      by (le, cluster, namespace, job, route))
    record: cluster_namespace_job_route:cortex_request_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_request_duration_seconds_bucket[1m]))
+      by (le, cluster, namespace, job, route))
    record: cluster_namespace_job_route:cortex_request_duration_seconds:50quantile
-  - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace, job, route) / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, namespace, job, route)
+  - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace,
+      job, route) / sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster,
+      namespace, job, route)
    record: cluster_namespace_job_route:cortex_request_duration_seconds:avg
-  - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, job, route)
+  - expr: sum(rate(cortex_request_duration_seconds_bucket[1m])) by (le, cluster, namespace,
+      job, route)
    record: cluster_namespace_job_route:cortex_request_duration_seconds_bucket:sum_rate
-  - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace, job, route)
+  - expr: sum(rate(cortex_request_duration_seconds_sum[1m])) by (cluster, namespace,
+      job, route)
    record: cluster_namespace_job_route:cortex_request_duration_seconds_sum:sum_rate
-  - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, namespace, job, route)
+  - expr: sum(rate(cortex_request_duration_seconds_count[1m])) by (cluster, namespace,
+      job, route)
    record: cluster_namespace_job_route:cortex_request_duration_seconds_count:sum_rate
 - name: cortex_cache
  rules:
-  - expr: histogram_quantile(0.99, sum(rate(cortex_memcache_request_duration_seconds_bucket[1m])) by (le, cluster, job, method))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_memcache_request_duration_seconds_bucket[1m]))
+      by (le, cluster, job, method))
    record: cluster_job_method:cortex_memcache_request_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_memcache_request_duration_seconds_bucket[1m])) by (le, cluster, job, method))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_memcache_request_duration_seconds_bucket[1m]))
+      by (le, cluster, job, method))
    record: cluster_job_method:cortex_memcache_request_duration_seconds:50quantile
-  - expr: sum(rate(cortex_memcache_request_duration_seconds_sum[1m])) by (cluster, job, method) / sum(rate(cortex_memcache_request_duration_seconds_count[1m])) by (cluster, job, method)
+  - expr: sum(rate(cortex_memcache_request_duration_seconds_sum[1m])) by (cluster,
+      job, method) / sum(rate(cortex_memcache_request_duration_seconds_count[1m]))
+      by (cluster, job, method)
    record: cluster_job_method:cortex_memcache_request_duration_seconds:avg
-  - expr: sum(rate(cortex_memcache_request_duration_seconds_bucket[1m])) by (le, cluster, job, method)
+  - expr: sum(rate(cortex_memcache_request_duration_seconds_bucket[1m])) by (le, cluster,
+      job, method)
    record: cluster_job_method:cortex_memcache_request_duration_seconds_bucket:sum_rate
-  - expr: sum(rate(cortex_memcache_request_duration_seconds_sum[1m])) by (cluster, job, method)
+  - expr: sum(rate(cortex_memcache_request_duration_seconds_sum[1m])) by (cluster,
+      job, method)
    record: cluster_job_method:cortex_memcache_request_duration_seconds_sum:sum_rate
-  - expr: sum(rate(cortex_memcache_request_duration_seconds_count[1m])) by (cluster, job, method)
+  - expr: sum(rate(cortex_memcache_request_duration_seconds_count[1m])) by (cluster,
+      job, method)
    record: cluster_job_method:cortex_memcache_request_duration_seconds_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_cache_request_duration_seconds_bucket[1m]))
+      by (le, cluster, job))
    record: cluster_job:cortex_cache_request_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_cache_request_duration_seconds_bucket[1m]))
+      by (le, cluster, job))
    record: cluster_job:cortex_cache_request_duration_seconds:50quantile
-  - expr: sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job)
+      / sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster, job)
    record: cluster_job:cortex_cache_request_duration_seconds:avg
-  - expr: sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster, job)
+  - expr: sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster,
+      job)
    record: cluster_job:cortex_cache_request_duration_seconds_bucket:sum_rate
  - expr: sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job)
    record: cluster_job:cortex_cache_request_duration_seconds_sum:sum_rate
-  - expr: sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster,
+      job)
    record: cluster_job:cortex_cache_request_duration_seconds_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster, job, method))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_cache_request_duration_seconds_bucket[1m]))
+      by (le, cluster, job, method))
    record: cluster_job_method:cortex_cache_request_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster, job, method))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_cache_request_duration_seconds_bucket[1m]))
+      by (le, cluster, job, method))
    record: cluster_job_method:cortex_cache_request_duration_seconds:50quantile
-  - expr: sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job, method) / sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster, job, method)
+  - expr: sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job,
+      method) / sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster,
+      job, method)
    record: cluster_job_method:cortex_cache_request_duration_seconds:avg
-  - expr: sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster, job, method)
+  - expr: sum(rate(cortex_cache_request_duration_seconds_bucket[1m])) by (le, cluster,
+      job, method)
    record: cluster_job_method:cortex_cache_request_duration_seconds_bucket:sum_rate
-  - expr: sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job, method)
+  - expr: sum(rate(cortex_cache_request_duration_seconds_sum[1m])) by (cluster, job,
+      method)
    record: cluster_job_method:cortex_cache_request_duration_seconds_sum:sum_rate
-  - expr: sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster, job, method)
+  - expr: sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (cluster,
+      job, method)
    record: cluster_job_method:cortex_cache_request_duration_seconds_count:sum_rate
 - name: cortex_storage
  rules:
-  - expr: histogram_quantile(0.99, sum(rate(cortex_bigtable_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_bigtable_request_duration_seconds_bucket[1m]))
+      by (le, cluster, job, operation))
    record: cluster_job_operation:cortex_bigtable_request_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_bigtable_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_bigtable_request_duration_seconds_bucket[1m]))
+      by (le, cluster, job, operation))
    record: cluster_job_operation:cortex_bigtable_request_duration_seconds:50quantile
-  - expr: sum(rate(cortex_bigtable_request_duration_seconds_sum[1m])) by (cluster, job, operation) / sum(rate(cortex_bigtable_request_duration_seconds_count[1m])) by (cluster, job, operation)
+  - expr: sum(rate(cortex_bigtable_request_duration_seconds_sum[1m])) by (cluster,
+      job, operation) / sum(rate(cortex_bigtable_request_duration_seconds_count[1m]))
+      by (cluster, job, operation)
    record: cluster_job_operation:cortex_bigtable_request_duration_seconds:avg
-  - expr: sum(rate(cortex_bigtable_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation)
+  - expr: sum(rate(cortex_bigtable_request_duration_seconds_bucket[1m])) by (le, cluster,
+      job, operation)
    record: cluster_job_operation:cortex_bigtable_request_duration_seconds_bucket:sum_rate
-  - expr: sum(rate(cortex_bigtable_request_duration_seconds_sum[1m])) by (cluster, job, operation)
+  - expr: sum(rate(cortex_bigtable_request_duration_seconds_sum[1m])) by (cluster,
+      job, operation)
    record: cluster_job_operation:cortex_bigtable_request_duration_seconds_sum:sum_rate
-  - expr: sum(rate(cortex_bigtable_request_duration_seconds_count[1m])) by (cluster, job, operation)
+  - expr: sum(rate(cortex_bigtable_request_duration_seconds_count[1m])) by (cluster,
+      job, operation)
    record: cluster_job_operation:cortex_bigtable_request_duration_seconds_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_cassandra_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_cassandra_request_duration_seconds_bucket[1m]))
+      by (le, cluster, job, operation))
    record: cluster_job_operation:cortex_cassandra_request_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_cassandra_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_cassandra_request_duration_seconds_bucket[1m]))
+      by (le, cluster, job, operation))
    record: cluster_job_operation:cortex_cassandra_request_duration_seconds:50quantile
-  - expr: sum(rate(cortex_cassandra_request_duration_seconds_sum[1m])) by (cluster, job, operation) / sum(rate(cortex_cassandra_request_duration_seconds_count[1m])) by (cluster, job, operation)
+  - expr: sum(rate(cortex_cassandra_request_duration_seconds_sum[1m])) by (cluster,
+      job, operation) / sum(rate(cortex_cassandra_request_duration_seconds_count[1m]))
+      by (cluster, job, operation)
    record: cluster_job_operation:cortex_cassandra_request_duration_seconds:avg
-  - expr: sum(rate(cortex_cassandra_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation)
+  - expr: sum(rate(cortex_cassandra_request_duration_seconds_bucket[1m])) by (le,
+      cluster, job, operation)
    record: cluster_job_operation:cortex_cassandra_request_duration_seconds_bucket:sum_rate
-  - expr: sum(rate(cortex_cassandra_request_duration_seconds_sum[1m])) by (cluster, job, operation)
+  - expr: sum(rate(cortex_cassandra_request_duration_seconds_sum[1m])) by (cluster,
+      job, operation)
    record: cluster_job_operation:cortex_cassandra_request_duration_seconds_sum:sum_rate
-  - expr: sum(rate(cortex_cassandra_request_duration_seconds_count[1m])) by (cluster, job, operation)
+  - expr: sum(rate(cortex_cassandra_request_duration_seconds_count[1m])) by (cluster,
+      job, operation)
    record: cluster_job_operation:cortex_cassandra_request_duration_seconds_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_dynamo_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_dynamo_request_duration_seconds_bucket[1m]))
+      by (le, cluster, job, operation))
    record: cluster_job_operation:cortex_dynamo_request_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_dynamo_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_dynamo_request_duration_seconds_bucket[1m]))
+      by (le, cluster, job, operation))
    record: cluster_job_operation:cortex_dynamo_request_duration_seconds:50quantile
-  - expr: sum(rate(cortex_dynamo_request_duration_seconds_sum[1m])) by (cluster, job, operation) / sum(rate(cortex_dynamo_request_duration_seconds_count[1m])) by (cluster, job, operation)
+  - expr: sum(rate(cortex_dynamo_request_duration_seconds_sum[1m])) by (cluster, job,
+      operation) / sum(rate(cortex_dynamo_request_duration_seconds_count[1m])) by
+      (cluster, job, operation)
    record: cluster_job_operation:cortex_dynamo_request_duration_seconds:avg
-  - expr: sum(rate(cortex_dynamo_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation)
+  - expr: sum(rate(cortex_dynamo_request_duration_seconds_bucket[1m])) by (le, cluster,
+      job, operation)
    record: cluster_job_operation:cortex_dynamo_request_duration_seconds_bucket:sum_rate
-  - expr: sum(rate(cortex_dynamo_request_duration_seconds_sum[1m])) by (cluster, job, operation)
+  - expr: sum(rate(cortex_dynamo_request_duration_seconds_sum[1m])) by (cluster, job,
+      operation)
    record: cluster_job_operation:cortex_dynamo_request_duration_seconds_sum:sum_rate
-  - expr: sum(rate(cortex_dynamo_request_duration_seconds_count[1m])) by (cluster, job, operation)
+  - expr: sum(rate(cortex_dynamo_request_duration_seconds_count[1m])) by (cluster,
+      job, operation)
    record: cluster_job_operation:cortex_dynamo_request_duration_seconds_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_chunk_store_index_lookups_per_query_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_chunk_store_index_lookups_per_query_bucket[1m]))
+      by (le, cluster, job))
    record: cluster_job:cortex_chunk_store_index_lookups_per_query:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_chunk_store_index_lookups_per_query_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_chunk_store_index_lookups_per_query_bucket[1m]))
+      by (le, cluster, job))
    record: cluster_job:cortex_chunk_store_index_lookups_per_query:50quantile
-  - expr: sum(rate(cortex_chunk_store_index_lookups_per_query_sum[1m])) by (cluster, job) / sum(rate(cortex_chunk_store_index_lookups_per_query_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_chunk_store_index_lookups_per_query_sum[1m])) by (cluster,
+      job) / sum(rate(cortex_chunk_store_index_lookups_per_query_count[1m])) by (cluster,
+      job)
    record: cluster_job:cortex_chunk_store_index_lookups_per_query:avg
-  - expr: sum(rate(cortex_chunk_store_index_lookups_per_query_bucket[1m])) by (le, cluster, job)
+  - expr: sum(rate(cortex_chunk_store_index_lookups_per_query_bucket[1m])) by (le,
+      cluster, job)
    record: cluster_job:cortex_chunk_store_index_lookups_per_query_bucket:sum_rate
-  - expr: sum(rate(cortex_chunk_store_index_lookups_per_query_sum[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_chunk_store_index_lookups_per_query_sum[1m])) by (cluster,
+      job)
    record: cluster_job:cortex_chunk_store_index_lookups_per_query_sum:sum_rate
-  - expr: sum(rate(cortex_chunk_store_index_lookups_per_query_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_chunk_store_index_lookups_per_query_count[1m])) by (cluster,
+      job)
    record: cluster_job:cortex_chunk_store_index_lookups_per_query_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_chunk_store_series_pre_intersection_per_query_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_chunk_store_series_pre_intersection_per_query_bucket[1m]))
+      by (le, cluster, job))
    record: cluster_job:cortex_chunk_store_series_pre_intersection_per_query:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_chunk_store_series_pre_intersection_per_query_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_chunk_store_series_pre_intersection_per_query_bucket[1m]))
+      by (le, cluster, job))
    record: cluster_job:cortex_chunk_store_series_pre_intersection_per_query:50quantile
-  - expr: sum(rate(cortex_chunk_store_series_pre_intersection_per_query_sum[1m])) by (cluster, job) / sum(rate(cortex_chunk_store_series_pre_intersection_per_query_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_chunk_store_series_pre_intersection_per_query_sum[1m]))
+      by (cluster, job) / sum(rate(cortex_chunk_store_series_pre_intersection_per_query_count[1m]))
+      by (cluster, job)
    record: cluster_job:cortex_chunk_store_series_pre_intersection_per_query:avg
-  - expr: sum(rate(cortex_chunk_store_series_pre_intersection_per_query_bucket[1m])) by (le, cluster, job)
+  - expr: sum(rate(cortex_chunk_store_series_pre_intersection_per_query_bucket[1m]))
+      by (le, cluster, job)
    record: cluster_job:cortex_chunk_store_series_pre_intersection_per_query_bucket:sum_rate
-  - expr: sum(rate(cortex_chunk_store_series_pre_intersection_per_query_sum[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_chunk_store_series_pre_intersection_per_query_sum[1m]))
+      by (cluster, job)
    record: cluster_job:cortex_chunk_store_series_pre_intersection_per_query_sum:sum_rate
-  - expr: sum(rate(cortex_chunk_store_series_pre_intersection_per_query_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_chunk_store_series_pre_intersection_per_query_count[1m]))
+      by (cluster, job)
    record: cluster_job:cortex_chunk_store_series_pre_intersection_per_query_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_chunk_store_series_post_intersection_per_query_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_chunk_store_series_post_intersection_per_query_bucket[1m]))
+      by (le, cluster, job))
    record: cluster_job:cortex_chunk_store_series_post_intersection_per_query:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_chunk_store_series_post_intersection_per_query_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_chunk_store_series_post_intersection_per_query_bucket[1m]))
+      by (le, cluster, job))
    record: cluster_job:cortex_chunk_store_series_post_intersection_per_query:50quantile
-  - expr: sum(rate(cortex_chunk_store_series_post_intersection_per_query_sum[1m])) by (cluster, job) / sum(rate(cortex_chunk_store_series_post_intersection_per_query_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_chunk_store_series_post_intersection_per_query_sum[1m]))
+      by (cluster, job) / sum(rate(cortex_chunk_store_series_post_intersection_per_query_count[1m]))
+      by (cluster, job)
    record: cluster_job:cortex_chunk_store_series_post_intersection_per_query:avg
-  - expr: sum(rate(cortex_chunk_store_series_post_intersection_per_query_bucket[1m])) by (le, cluster, job)
+  - expr: sum(rate(cortex_chunk_store_series_post_intersection_per_query_bucket[1m]))
+      by (le, cluster, job)
    record: cluster_job:cortex_chunk_store_series_post_intersection_per_query_bucket:sum_rate
-  - expr: sum(rate(cortex_chunk_store_series_post_intersection_per_query_sum[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_chunk_store_series_post_intersection_per_query_sum[1m]))
+      by (cluster, job)
    record: cluster_job:cortex_chunk_store_series_post_intersection_per_query_sum:sum_rate
-  - expr: sum(rate(cortex_chunk_store_series_post_intersection_per_query_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_chunk_store_series_post_intersection_per_query_count[1m]))
+      by (cluster, job)
    record: cluster_job:cortex_chunk_store_series_post_intersection_per_query_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_chunk_store_chunks_per_query_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_chunk_store_chunks_per_query_bucket[1m]))
+      by (le, cluster, job))
    record: cluster_job:cortex_chunk_store_chunks_per_query:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_chunk_store_chunks_per_query_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_chunk_store_chunks_per_query_bucket[1m]))
+      by (le, cluster, job))
    record: cluster_job:cortex_chunk_store_chunks_per_query:50quantile
-  - expr: sum(rate(cortex_chunk_store_chunks_per_query_sum[1m])) by (cluster, job) / sum(rate(cortex_chunk_store_chunks_per_query_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_chunk_store_chunks_per_query_sum[1m])) by (cluster, job)
+      / sum(rate(cortex_chunk_store_chunks_per_query_count[1m])) by (cluster, job)
    record: cluster_job:cortex_chunk_store_chunks_per_query:avg
-  - expr: sum(rate(cortex_chunk_store_chunks_per_query_bucket[1m])) by (le, cluster, job)
+  - expr: sum(rate(cortex_chunk_store_chunks_per_query_bucket[1m])) by (le, cluster,
+      job)
    record: cluster_job:cortex_chunk_store_chunks_per_query_bucket:sum_rate
  - expr: sum(rate(cortex_chunk_store_chunks_per_query_sum[1m])) by (cluster, job)
    record: cluster_job:cortex_chunk_store_chunks_per_query_sum:sum_rate
  - expr: sum(rate(cortex_chunk_store_chunks_per_query_count[1m])) by (cluster, job)
    record: cluster_job:cortex_chunk_store_chunks_per_query_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_database_request_duration_seconds_bucket[1m])) by (le, cluster, job, method))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_database_request_duration_seconds_bucket[1m]))
+      by (le, cluster, job, method))
    record: cluster_job_method:cortex_database_request_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_database_request_duration_seconds_bucket[1m])) by (le, cluster, job, method))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_database_request_duration_seconds_bucket[1m]))
+      by (le, cluster, job, method))
    record: cluster_job_method:cortex_database_request_duration_seconds:50quantile
-  - expr: sum(rate(cortex_database_request_duration_seconds_sum[1m])) by (cluster, job, method) / sum(rate(cortex_database_request_duration_seconds_count[1m])) by (cluster, job, method)
+  - expr: sum(rate(cortex_database_request_duration_seconds_sum[1m])) by (cluster,
+      job, method) / sum(rate(cortex_database_request_duration_seconds_count[1m]))
+      by (cluster, job, method)
    record: cluster_job_method:cortex_database_request_duration_seconds:avg
-  - expr: sum(rate(cortex_database_request_duration_seconds_bucket[1m])) by (le, cluster, job, method)
+  - expr: sum(rate(cortex_database_request_duration_seconds_bucket[1m])) by (le, cluster,
+      job, method)
    record: cluster_job_method:cortex_database_request_duration_seconds_bucket:sum_rate
-  - expr: sum(rate(cortex_database_request_duration_seconds_sum[1m])) by (cluster, job, method)
+  - expr: sum(rate(cortex_database_request_duration_seconds_sum[1m])) by (cluster,
+      job, method)
    record: cluster_job_method:cortex_database_request_duration_seconds_sum:sum_rate
-  - expr: sum(rate(cortex_database_request_duration_seconds_count[1m])) by (cluster, job, method)
+  - expr: sum(rate(cortex_database_request_duration_seconds_count[1m])) by (cluster,
+      job, method)
    record: cluster_job_method:cortex_database_request_duration_seconds_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_gcs_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_gcs_request_duration_seconds_bucket[1m]))
+      by (le, cluster, job, operation))
    record: cluster_job_operation:cortex_gcs_request_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_gcs_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_gcs_request_duration_seconds_bucket[1m]))
+      by (le, cluster, job, operation))
    record: cluster_job_operation:cortex_gcs_request_duration_seconds:50quantile
-  - expr: sum(rate(cortex_gcs_request_duration_seconds_sum[1m])) by (cluster, job, operation) / sum(rate(cortex_gcs_request_duration_seconds_count[1m])) by (cluster, job, operation)
+  - expr: sum(rate(cortex_gcs_request_duration_seconds_sum[1m])) by (cluster, job,
+      operation) / sum(rate(cortex_gcs_request_duration_seconds_count[1m])) by (cluster,
+      job, operation)
    record: cluster_job_operation:cortex_gcs_request_duration_seconds:avg
-  - expr: sum(rate(cortex_gcs_request_duration_seconds_bucket[1m])) by (le, cluster, job, operation)
+  - expr: sum(rate(cortex_gcs_request_duration_seconds_bucket[1m])) by (le, cluster,
+      job, operation)
    record: cluster_job_operation:cortex_gcs_request_duration_seconds_bucket:sum_rate
-  - expr: sum(rate(cortex_gcs_request_duration_seconds_sum[1m])) by (cluster, job, operation)
+  - expr: sum(rate(cortex_gcs_request_duration_seconds_sum[1m])) by (cluster, job,
+      operation)
    record: cluster_job_operation:cortex_gcs_request_duration_seconds_sum:sum_rate
-  - expr: sum(rate(cortex_gcs_request_duration_seconds_count[1m])) by (cluster, job, operation)
+  - expr: sum(rate(cortex_gcs_request_duration_seconds_count[1m])) by (cluster, job,
+      operation)
    record: cluster_job_operation:cortex_gcs_request_duration_seconds_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket[1m]))
+      by (le, cluster, job))
    record: cluster_job:cortex_kv_request_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket[1m]))
+      by (le, cluster, job))
    record: cluster_job:cortex_kv_request_duration_seconds:50quantile
-  - expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job)
+      / sum(rate(cortex_kv_request_duration_seconds_count[1m])) by (cluster, job)
    record: cluster_job:cortex_kv_request_duration_seconds:avg
-  - expr: sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) by (le, cluster, job)
+  - expr: sum(rate(cortex_kv_request_duration_seconds_bucket[1m])) by (le, cluster,
+      job)
    record: cluster_job:cortex_kv_request_duration_seconds_bucket:sum_rate
  - expr: sum(rate(cortex_kv_request_duration_seconds_sum[1m])) by (cluster, job)
    record: cluster_job:cortex_kv_request_duration_seconds_sum:sum_rate
@ -199,11 +296,14 @@ groups:
    record: cluster_job:cortex_kv_request_duration_seconds_count:sum_rate
 - name: cortex_queries
  rules:
-  - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket[1m]))
+      by (le, cluster, job))
    record: cluster_job:cortex_query_frontend_retries:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket[1m]))
+      by (le, cluster, job))
    record: cluster_job:cortex_query_frontend_retries:50quantile
-  - expr: sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job) / sum(rate(cortex_query_frontend_retries_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_query_frontend_retries_sum[1m])) by (cluster, job) / sum(rate(cortex_query_frontend_retries_count[1m]))
+      by (cluster, job)
    record: cluster_job:cortex_query_frontend_retries:avg
  - expr: sum(rate(cortex_query_frontend_retries_bucket[1m])) by (le, cluster, job)
    record: cluster_job:cortex_query_frontend_retries_bucket:sum_rate
@ -211,23 +311,33 @@ groups:
    record: cluster_job:cortex_query_frontend_retries_sum:sum_rate
  - expr: sum(rate(cortex_query_frontend_retries_count[1m])) by (cluster, job)
    record: cluster_job:cortex_query_frontend_retries_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m]))
+      by (le, cluster, job))
    record: cluster_job:cortex_query_frontend_queue_duration_seconds:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m]))
+      by (le, cluster, job))
    record: cluster_job:cortex_query_frontend_queue_duration_seconds:50quantile
-  - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster,
+      job) / sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by
+      (cluster, job)
    record: cluster_job:cortex_query_frontend_queue_duration_seconds:avg
-  - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) by (le, cluster, job)
+  - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_bucket[1m])) by (le,
+      cluster, job)
    record: cluster_job:cortex_query_frontend_queue_duration_seconds_bucket:sum_rate
-  - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_sum[1m])) by (cluster,
+      job)
    record: cluster_job:cortex_query_frontend_queue_duration_seconds_sum:sum_rate
-  - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_query_frontend_queue_duration_seconds_count[1m])) by (cluster,
+      job)
    record: cluster_job:cortex_query_frontend_queue_duration_seconds_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_series_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_series_bucket[1m]))
+      by (le, cluster, job))
    record: cluster_job:cortex_ingester_queried_series:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_series_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_series_bucket[1m]))
+      by (le, cluster, job))
    record: cluster_job:cortex_ingester_queried_series:50quantile
-  - expr: sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job) / sum(rate(cortex_ingester_queried_series_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_ingester_queried_series_sum[1m])) by (cluster, job) / sum(rate(cortex_ingester_queried_series_count[1m]))
+      by (cluster, job)
    record: cluster_job:cortex_ingester_queried_series:avg
  - expr: sum(rate(cortex_ingester_queried_series_bucket[1m])) by (le, cluster, job)
    record: cluster_job:cortex_ingester_queried_series_bucket:sum_rate
@ -235,11 +345,14 @@ groups:
    record: cluster_job:cortex_ingester_queried_series_sum:sum_rate
  - expr: sum(rate(cortex_ingester_queried_series_count[1m])) by (cluster, job)
    record: cluster_job:cortex_ingester_queried_series_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_chunks_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_chunks_bucket[1m]))
+      by (le, cluster, job))
    record: cluster_job:cortex_ingester_queried_chunks:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_chunks_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_chunks_bucket[1m]))
+      by (le, cluster, job))
    record: cluster_job:cortex_ingester_queried_chunks:50quantile
-  - expr: sum(rate(cortex_ingester_queried_chunks_sum[1m])) by (cluster, job) / sum(rate(cortex_ingester_queried_chunks_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_ingester_queried_chunks_sum[1m])) by (cluster, job) / sum(rate(cortex_ingester_queried_chunks_count[1m]))
+      by (cluster, job)
    record: cluster_job:cortex_ingester_queried_chunks:avg
  - expr: sum(rate(cortex_ingester_queried_chunks_bucket[1m])) by (le, cluster, job)
    record: cluster_job:cortex_ingester_queried_chunks_bucket:sum_rate
@ -247,11 +360,14 @@ groups:
    record: cluster_job:cortex_ingester_queried_chunks_sum:sum_rate
  - expr: sum(rate(cortex_ingester_queried_chunks_count[1m])) by (cluster, job)
    record: cluster_job:cortex_ingester_queried_chunks_count:sum_rate
-  - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_samples_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.99, sum(rate(cortex_ingester_queried_samples_bucket[1m]))
+      by (le, cluster, job))
    record: cluster_job:cortex_ingester_queried_samples:99quantile
-  - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_samples_bucket[1m])) by (le, cluster, job))
+  - expr: histogram_quantile(0.50, sum(rate(cortex_ingester_queried_samples_bucket[1m]))
+      by (le, cluster, job))
    record: cluster_job:cortex_ingester_queried_samples:50quantile
-  - expr: sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job) / sum(rate(cortex_ingester_queried_samples_count[1m])) by (cluster, job)
+  - expr: sum(rate(cortex_ingester_queried_samples_sum[1m])) by (cluster, job) / sum(rate(cortex_ingester_queried_samples_count[1m]))
+      by (cluster, job)
    record: cluster_job:cortex_ingester_queried_samples:avg
  - expr: sum(rate(cortex_ingester_queried_samples_bucket[1m])) by (le, cluster, job)
    record: cluster_job:cortex_ingester_queried_samples_bucket:sum_rate
--- a/assets/etcd/alerts.yaml
+++ b/assets/etcd/alerts.yaml
@ -18,7 +18,8 @@ groups:
      severity: critical
  - alert: etcdInsufficientMembers
    annotations:
-      message: 'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value }}).'
+      message: 'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value
+        }}).'
    expr: |
      sum(up{job=~".*etcd.*"} == bool 1) without (instance) < ((count(up{job=~".*etcd.*"}) without (instance) + 1) / 2)
    for: 3m
@ -26,7 +27,8 @@ groups:
      severity: critical
  - alert: etcdNoLeader
    annotations:
-      message: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }} has no leader.'
+      message: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }} has
+        no leader.'
    expr: |
      etcd_server_has_leader{job=~".*etcd.*"} == 0
    for: 1m
@ -34,7 +36,9 @@ groups:
      severity: critical
  - alert: etcdHighNumberOfLeaderChanges
    annotations:
-      message: 'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.'
+      message: 'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes within
+        the last 15 minutes. Frequent elections may be a sign of insufficient resources,
+        high network latency, or disruptions by other components and should be investigated.'
    expr: |
      increase((max without (instance) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"}) or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m]) >= 4
    for: 5m
@ -42,7 +46,8 @@ groups:
      severity: warning
  - alert: etcdHighNumberOfFailedGRPCRequests
    annotations:
-      message: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
+      message: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{
+        $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
    expr: |
      100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) without (grpc_type, grpc_code)
        /
@ -53,7 +58,8 @@ groups:
      severity: warning
  - alert: etcdHighNumberOfFailedGRPCRequests
    annotations:
-      message: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
+      message: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{
+        $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
    expr: |
      100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) without (grpc_type, grpc_code)
        /
@ -64,7 +70,8 @@ groups:
      severity: critical
  - alert: etcdGRPCRequestsSlow
    annotations:
-      message: 'etcd cluster "{{ $labels.job }}": gRPC requests to {{ $labels.grpc_method }} are taking {{ $value }}s on etcd instance {{ $labels.instance }}.'
+      message: 'etcd cluster "{{ $labels.job }}": gRPC requests to {{ $labels.grpc_method
+        }} are taking {{ $value }}s on etcd instance {{ $labels.instance }}.'
    expr: |
      histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_type="unary"}[5m])) without(grpc_type))
      > 0.15
@ -73,7 +80,8 @@ groups:
      severity: critical
  - alert: etcdMemberCommunicationSlow
    annotations:
-      message: 'etcd cluster "{{ $labels.job }}": member communication with {{ $labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance }}.'
+      message: 'etcd cluster "{{ $labels.job }}": member communication with {{ $labels.To
+        }} is taking {{ $value }}s on etcd instance {{ $labels.instance }}.'
    expr: |
      histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m]))
      > 0.15
@ -82,7 +90,8 @@ groups:
      severity: warning
  - alert: etcdHighNumberOfFailedProposals
    annotations:
-      message: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures within the last 30 minutes on etcd instance {{ $labels.instance }}.'
+      message: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures within
+        the last 30 minutes on etcd instance {{ $labels.instance }}.'
    expr: |
      rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5
    for: 15m
@ -90,7 +99,8 @@ groups:
      severity: warning
  - alert: etcdHighFsyncDurations
    annotations:
-      message: 'etcd cluster "{{ $labels.job }}": 99th percentile fync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.'
+      message: 'etcd cluster "{{ $labels.job }}": 99th percentile fync durations are
+        {{ $value }}s on etcd instance {{ $labels.instance }}.'
    expr: |
      histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
      > 0.5
@ -99,7 +109,8 @@ groups:
      severity: warning
  - alert: etcdHighCommitDurations
    annotations:
-      message: 'etcd cluster "{{ $labels.job }}": 99th percentile commit durations {{ $value }}s on etcd instance {{ $labels.instance }}.'
+      message: 'etcd cluster "{{ $labels.job }}": 99th percentile commit durations
+        {{ $value }}s on etcd instance {{ $labels.instance }}.'
    expr: |
      histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
      > 0.25
@ -108,7 +119,8 @@ groups:
      severity: warning
  - alert: etcdHighNumberOfFailedHTTPRequests
    annotations:
-      message: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}'
+      message: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd
+        instance {{ $labels.instance }}'
    expr: |
      sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) without (code) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
      without (code) > 0.01
@ -117,7 +129,8 @@ groups:
      severity: warning
  - alert: etcdHighNumberOfFailedHTTPRequests
    annotations:
-      message: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}.'
+      message: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd
+        instance {{ $labels.instance }}.'
    expr: |
      sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) without (code) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
      without (code) > 0.05
@ -126,7 +139,8 @@ groups:
      severity: critical
  - alert: etcdHTTPRequestsSlow
    annotations:
-      message: etcd instance {{ $labels.instance }} HTTP requests to {{ $labels.method }} are slow.
+      message: etcd instance {{ $labels.instance }} HTTP requests to {{ $labels.method
+        }} are slow.
    expr: |
      histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m]))
      > 0.15
--- a/assets/gluster/alerts.yaml
+++ b/assets/gluster/alerts.yaml
@ -49,7 +49,8 @@ groups:
      severity: critical
  - alert: GlusterBrickUtilization
    annotations:
-      message: Gluster Brick {{$labels.host}}:{{$labels.brick_path}} Utilization more than 80%
+      message: Gluster Brick {{$labels.host}}:{{$labels.brick_path}} Utilization more
+        than 80%
    expr: |
      100 * gluster_brick_capacity_used_bytes{job="glusterd2-client"}
          / gluster_brick_capacity_bytes_total{job="glusterd2-client"} > 80
@ -58,7 +59,8 @@ groups:
      severity: warning
  - alert: GlusterBrickUtilization
    annotations:
-      message: Gluster Brick {{$labels.host}}:{{$labels.brick_path}} Utilization more than 90%
+      message: Gluster Brick {{$labels.host}}:{{$labels.brick_path}} Utilization more
+        than 90%
    expr: |
      100 * gluster_brick_capacity_used_bytes{job="glusterd2-client"}
          / gluster_brick_capacity_bytes_total{job="glusterd2-client"} > 90
@ -69,7 +71,8 @@ groups:
  rules:
  - alert: GlusterThinpoolDataUtilization
    annotations:
-      message: Gluster Thinpool {{ $labels.thinpool_name }} Data Utilization more than 80%
+      message: Gluster Thinpool {{ $labels.thinpool_name }} Data Utilization more
+        than 80%
    expr: |
      gluster_thinpool_data_used_bytes{job="glusterd2-client"} / gluster_thinpool_data_total_bytes{job="glusterd2-client"} > 0.8
    for: 5m
@ -77,7 +80,8 @@ groups:
      severity: warning
  - alert: GlusterThinpoolDataUtilization
    annotations:
-      message: Gluster Thinpool {{ $labels.thinpool_name }} Data Utilization more than 90%
+      message: Gluster Thinpool {{ $labels.thinpool_name }} Data Utilization more
+        than 90%
    expr: |
      gluster_thinpool_data_used_bytes{job="glusterd2-client"} / gluster_thinpool_data_total_bytes{job="glusterd2-client"} > 0.9
    for: 5m
@ -85,7 +89,8 @@ groups:
      severity: critical
  - alert: GlusterThinpoolMetadataUtilization
    annotations:
-      message: Gluster Thinpool {{ $labels.thinpool_name }} Metadata Utilization more than 80%
+      message: Gluster Thinpool {{ $labels.thinpool_name }} Metadata Utilization more
+        than 80%
    expr: |
      gluster_thinpool_metadata_used_bytes{job="glusterd2-client"} / gluster_thinpool_metadata_total_bytes{job="glusterd2-client"} > 0.8
    for: 5m
@ -93,7 +98,8 @@ groups:
      severity: warning
  - alert: GlusterThinpoolMetadataUtilization
    annotations:
-      message: Gluster Thinpool {{ $labels.thinpool_name }} Metadata Utilization more than 90%
+      message: Gluster Thinpool {{ $labels.thinpool_name }} Metadata Utilization more
+        than 90%
    expr: |
      gluster_thinpool_metadata_used_bytes{job="glusterd2-client"} / gluster_thinpool_metadata_total_bytes{job="glusterd2-client"} > 0.9
    for: 5m
--- a/assets/jaeger/alerts.yaml
+++ b/assets/jaeger/alerts.yaml
@ -13,7 +13,9 @@ groups:
    annotations:
      message: |
        {{ $labels.job }} {{ $labels.instance }} is experiencing {{ printf "%.2f" $value }}% HTTP errors.
-    expr: 100 * sum(rate(jaeger_agent_http_server_errors_total[1m])) by (instance, job, namespace) / sum(rate(jaeger_agent_http_server_total[1m])) by (instance, job, namespace)> 1
+    expr: 100 * sum(rate(jaeger_agent_http_server_errors_total[1m])) by (instance,
+      job, namespace) / sum(rate(jaeger_agent_http_server_total[1m])) by (instance,
+      job, namespace)> 1
    for: 15m
    labels:
      severity: warning
@ -21,7 +23,9 @@ groups:
    annotations:
      message: |
        service {{ $labels.job }} {{ $labels.instance }} is dropping {{ printf "%.2f" $value }}% spans.
-    expr: 100 * sum(rate(jaeger_reporter_spans{result=~"dropped|err"}[1m])) by (instance, job, namespace) / sum(rate(jaeger_reporter_spans[1m])) by (instance, job, namespace)> 1
+    expr: 100 * sum(rate(jaeger_reporter_spans{result=~"dropped|err"}[1m])) by (instance,
+      job, namespace) / sum(rate(jaeger_reporter_spans[1m])) by (instance, job, namespace)>
+      1
    for: 15m
    labels:
      severity: warning
@ -29,7 +33,9 @@ groups:
    annotations:
      message: |
        agent {{ $labels.job }} {{ $labels.instance }} is dropping {{ printf "%.2f" $value }}% spans.
-    expr: 100 * sum(rate(jaeger_agent_reporter_batches_failures_total[1m])) by (instance, job, namespace) / sum(rate(jaeger_agent_reporter_batches_submitted_total[1m])) by (instance, job, namespace)> 1
+    expr: 100 * sum(rate(jaeger_agent_reporter_batches_failures_total[1m])) by (instance,
+      job, namespace) / sum(rate(jaeger_agent_reporter_batches_submitted_total[1m]))
+      by (instance, job, namespace)> 1
    for: 15m
    labels:
      severity: warning
@ -45,7 +51,9 @@ groups:
    annotations:
      message: |
        collector {{ $labels.job }} {{ $labels.instance }} is dropping {{ printf "%.2f" $value }}% spans.
-    expr: 100 * sum(rate(jaeger_collector_spans_dropped_total[1m])) by (instance, job, namespace) / sum(rate(jaeger_collector_spans_received_total[1m])) by (instance, job, namespace)> 1
+    expr: 100 * sum(rate(jaeger_collector_spans_dropped_total[1m])) by (instance,
+      job, namespace) / sum(rate(jaeger_collector_spans_received_total[1m])) by (instance,
+      job, namespace)> 1
    for: 15m
    labels:
      severity: warning
@ -53,7 +61,9 @@ groups:
    annotations:
      message: |
        {{ $labels.job }} {{ $labels.instance }} is failing {{ printf "%.2f" $value }}% in updating sampling policies.
-    expr: 100 * sum(rate(jaeger_sampler_queries{result="err"}[1m])) by (instance, job, namespace) / sum(rate(jaeger_sampler_queries[1m])) by (instance, job, namespace)> 1
+    expr: 100 * sum(rate(jaeger_sampler_queries{result="err"}[1m])) by (instance,
+      job, namespace) / sum(rate(jaeger_sampler_queries[1m])) by (instance, job, namespace)>
+      1
    for: 15m
    labels:
      severity: warning
@ -61,7 +71,8 @@ groups:
    annotations:
      message: |
        {{ $labels.job }} {{ $labels.instance }} is slow at persisting spans.
-    expr: histogram_quantile(0.99, sum by (le) (rate(jaeger_collector_save_latency_bucket[1m]))) > 0.5
+    expr: histogram_quantile(0.99, sum by (le) (rate(jaeger_collector_save_latency_bucket[1m])))
+      > 0.5
    for: 15m
    labels:
      severity: warning
@ -69,7 +80,9 @@ groups:
    annotations:
      message: |
        {{ $labels.job }} {{ $labels.instance }} is failing {{ printf "%.2f" $value }}% in updating throttling policies.
-    expr: 100 * sum(rate(jaeger_throttler_updates{result="err"}[1m])) by (instance, job, namespace) / sum(rate(jaeger_throttler_updates[1m])) by (instance, job, namespace)> 1
+    expr: 100 * sum(rate(jaeger_throttler_updates{result="err"}[1m])) by (instance,
+      job, namespace) / sum(rate(jaeger_throttler_updates[1m])) by (instance, job,
+      namespace)> 1
    for: 15m
    labels:
      severity: warning
@ -77,7 +90,9 @@ groups:
    annotations:
      message: |
        {{ $labels.job }} {{ $labels.instance }} is seeing {{ printf "%.2f" $value }}% query errors on {{ $labels.operation }}.
-    expr: 100 * sum(rate(jaeger_query_requests_total{result="err"}[1m])) by (instance, job, namespace) / sum(rate(jaeger_query_requests_total[1m])) by (instance, job, namespace)> 1
+    expr: 100 * sum(rate(jaeger_query_requests_total{result="err"}[1m])) by (instance,
+      job, namespace) / sum(rate(jaeger_query_requests_total[1m])) by (instance, job,
+      namespace)> 1
    for: 15m
    labels:
      severity: warning
@ -85,7 +100,9 @@ groups:
    annotations:
      message: |
        {{ $labels.job }} {{ $labels.instance }} is seeing {{ printf "%.2f" $value }}% query errors on {{ $labels.operation }}.
-    expr: 100 * sum(rate(jaeger_cassandra_errors_total[1m])) by (instance, job, namespace) / sum(rate(jaeger_cassandra_attempts_total[1m])) by (instance, job, namespace)> 1
+    expr: 100 * sum(rate(jaeger_cassandra_errors_total[1m])) by (instance, job, namespace)
+      / sum(rate(jaeger_cassandra_attempts_total[1m])) by (instance, job, namespace)>
+      1
    for: 15m
    labels:
      severity: warning
@ -93,7 +110,9 @@ groups:
    annotations:
      message: |
        {{ $labels.job }} {{ $labels.instance }} is seeing {{ printf "%.2f" $value }}% query errors on {{ $labels.operation }}.
-    expr: 100 * sum(rate(jaeger_cassandra_read_errors_total[1m])) by (instance, job, namespace) / sum(rate(jaeger_cassandra_read_attempts_total[1m])) by (instance, job, namespace)> 1
+    expr: 100 * sum(rate(jaeger_cassandra_read_errors_total[1m])) by (instance, job,
+      namespace) / sum(rate(jaeger_cassandra_read_attempts_total[1m])) by (instance,
+      job, namespace)> 1
    for: 15m
    labels:
      severity: warning
--- a/assets/kube-cockroachdb/alerts.yaml
+++ b/assets/kube-cockroachdb/alerts.yaml
@ -3,7 +3,8 @@ groups:
  rules:
  - alert: CockroachInstanceFlapping
    annotations:
-      message: '{{ $labels.instance }} for cluster {{ $labels.cluster }} restarted {{ $value }} time(s) in 10m'
+      message: '{{ $labels.instance }} for cluster {{ $labels.cluster }} restarted
+        {{ $value }} time(s) in 10m'
    expr: |
      resets(cockroachdb_sys_uptime{job="cockroachdb-public"}[10m]) > 5
    for: 1m
@ -29,7 +30,8 @@ groups:
      severity: warning
  - alert: CockroachStoreDiskLow
    annotations:
-      message: Store {{ $labels.store }} on node {{ $labels.instance }} at {{ $value }} available disk fraction
+      message: Store {{ $labels.store }} on node {{ $labels.instance }} at {{ $value
+        }} available disk fraction
    expr: |
      :cockroachdb_capacity_available:ratio{job="cockroachdb-public"} < 0.15
    for: 30m
@ -61,7 +63,8 @@ groups:
      severity: warning
  - alert: CockroachHighOpenFDCount
    annotations:
-      message: 'Too many open file descriptors on {{ $labels.instance }}: {{ $value }} fraction used'
+      message: 'Too many open file descriptors on {{ $labels.instance }}: {{ $value
+        }} fraction used'
    expr: |
      cockroachdb_sys_fd_open{job="cockroachdb-public"} / cockroachdb_sys_fd_softlimit{job="cockroachdb-public"} > 0.8
    for: 10m
--- a/assets/kube-state-metrics/alerts.yaml
+++ b/assets/kube-state-metrics/alerts.yaml
@ -3,7 +3,10 @@ groups:
  rules:
  - alert: KubeStateMetricsListErrors
    annotations:
-      message: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
+      description: kube-state-metrics is experiencing errors at an elevated rate in
+        list operations. This is likely causing it to not be able to expose metrics
+        about Kubernetes objects correctly or at all.
+      summary: kube-state-metrics is experiencing errors in list operations.
    expr: |
      (sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m]))
        /
@ -14,7 +17,10 @@ groups:
      severity: critical
  - alert: KubeStateMetricsWatchErrors
    annotations:
-      message: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
+      description: kube-state-metrics is experiencing errors at an elevated rate in
+        watch operations. This is likely causing it to not be able to expose metrics
+        about Kubernetes objects correctly or at all.
+      summary: kube-state-metrics is experiencing errors in watch operations.
    expr: |
      (sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m]))
        /
--- a/assets/kubernetes/alerts.yaml
+++ b/assets/kubernetes/alerts.yaml
@ -3,7 +3,8 @@ groups:
  rules:
  - alert: KubePodCrashLooping
    annotations:
-      message: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
+      message: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
+        }}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping
    expr: |
      rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[5m]) * 60 * 5 > 0
@ -12,7 +13,8 @@ groups:
      severity: warning
  - alert: KubePodNotReady
    annotations:
-      message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes.
+      message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
+        state for longer than 15 minutes.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready
    expr: |
      sum by (namespace, pod) (
@ -27,7 +29,9 @@ groups:
      severity: warning
  - alert: KubeDeploymentGenerationMismatch
    annotations:
-      message: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back.
+      message: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
+        }} does not match, this indicates that the Deployment has failed but has not
+        been rolled back.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentgenerationmismatch
    expr: |
      kube_deployment_status_observed_generation{job="kube-state-metrics"}
@ -38,7 +42,8 @@ groups:
      severity: warning
  - alert: KubeDeploymentReplicasMismatch
    annotations:
-      message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes.
+      message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not
+        matched the expected number of replicas for longer than 15 minutes.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch
    expr: |
      (
@ -55,7 +60,8 @@ groups:
      severity: warning
  - alert: KubeStatefulSetReplicasMismatch
    annotations:
-      message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes.
+      message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not
+        matched the expected number of replicas for longer than 15 minutes.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch
    expr: |
      (
@ -72,7 +78,9 @@ groups:
      severity: warning
  - alert: KubeStatefulSetGenerationMismatch
    annotations:
-      message: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back.
+      message: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
+        }} does not match, this indicates that the StatefulSet has failed but has
+        not been rolled back.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetgenerationmismatch
    expr: |
      kube_statefulset_status_observed_generation{job="kube-state-metrics"}
@ -83,7 +91,8 @@ groups:
      severity: warning
  - alert: KubeStatefulSetUpdateNotRolledOut
    annotations:
-      message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out.
+      message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update
+        has not been rolled out.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetupdatenotrolledout
    expr: |
      (
@ -108,7 +117,8 @@ groups:
      severity: warning
  - alert: KubeDaemonSetRolloutStuck
    annotations:
-      message: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 15 minutes.
+      message: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished
+        or progressed for at least 15 minutes.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetrolloutstuck
    expr: |
      (
@ -139,7 +149,8 @@ groups:
      severity: warning
  - alert: KubeContainerWaiting
    annotations:
-      message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}} has been in waiting state for longer than 1 hour.
+      message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}}
+        has been in waiting state for longer than 1 hour.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting
    expr: |
      sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0
@ -148,7 +159,8 @@ groups:
      severity: warning
  - alert: KubeDaemonSetNotScheduled
    annotations:
-      message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled.'
+      message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
+        }} are not scheduled.'
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetnotscheduled
    expr: |
      kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"}
@ -159,7 +171,8 @@ groups:
      severity: warning
  - alert: KubeDaemonSetMisScheduled
    annotations:
-      message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run.'
+      message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
+        }} are running where they are not supposed to run.'
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetmisscheduled
    expr: |
      kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0
@ -168,7 +181,8 @@ groups:
      severity: warning
  - alert: KubeJobCompletion
    annotations:
-      message: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than 12 hours to complete.
+      message: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than
+        12 hours to complete.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion
    expr: |
      kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"}  > 0
@ -186,7 +200,8 @@ groups:
      severity: warning
  - alert: KubeHpaReplicasMismatch
    annotations:
-      message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched the desired number of replicas for longer than 15 minutes.
+      message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched the desired
+        number of replicas for longer than 15 minutes.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpareplicasmismatch
    expr: |
      (kube_hpa_status_desired_replicas{job="kube-state-metrics"}
@ -199,7 +214,8 @@ groups:
      severity: warning
  - alert: KubeHpaMaxedOut
    annotations:
-      message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running at max replicas for longer than 15 minutes.
+      message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running at max
+        replicas for longer than 15 minutes.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpamaxedout
    expr: |
      kube_hpa_status_current_replicas{job="kube-state-metrics"}
@ -212,7 +228,8 @@ groups:
  rules:
  - alert: KubeCPUOvercommit
    annotations:
-      message: Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure.
+      message: Cluster has overcommitted CPU resource requests for Pods and cannot
+        tolerate node failure.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
    expr: |
      sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum{})
@ -225,7 +242,8 @@ groups:
      severity: warning
  - alert: KubeMemoryOvercommit
    annotations:
-      message: Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure.
+      message: Cluster has overcommitted memory resource requests for Pods and cannot
+        tolerate node failure.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit
    expr: |
      sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum{})
@ -264,7 +282,8 @@ groups:
      severity: warning
  - alert: KubeQuotaFullyUsed
    annotations:
-      message: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.
+      message: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
+        }} of its {{ $labels.resource }} quota.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotafullyused
    expr: |
      kube_resourcequota{job="kube-state-metrics", type="used"}
@ -276,7 +295,9 @@ groups:
      severity: info
  - alert: CPUThrottlingHigh
    annotations:
-      message: '{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }}.'
+      message: '{{ $value | humanizePercentage }} throttling of CPU in namespace {{
+        $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod
+        }}.'
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
    expr: |
      sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (container, pod, namespace)
@ -290,7 +311,9 @@ groups:
  rules:
  - alert: KubePersistentVolumeFillingUp
    annotations:
-      message: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage }} free.
+      message: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }}
+        in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage
+        }} free.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
    expr: |
      kubelet_volume_stats_available_bytes{job="kubelet"}
@ -302,7 +325,9 @@ groups:
      severity: critical
  - alert: KubePersistentVolumeFillingUp
    annotations:
-      message: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available.
+      message: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
+        }} in Namespace {{ $labels.namespace }} is expected to fill up within four
+        days. Currently {{ $value | humanizePercentage }} is available.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
    expr: |
      (
@ -317,7 +342,8 @@ groups:
      severity: warning
  - alert: KubePersistentVolumeErrors
    annotations:
-      message: The persistent volume {{ $labels.persistentvolume }} has status {{ $labels.phase }}.
+      message: The persistent volume {{ $labels.persistentvolume }} has status {{
+        $labels.phase }}.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeerrors
    expr: |
      kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0
@ -328,7 +354,8 @@ groups:
  rules:
  - alert: KubeVersionMismatch
    annotations:
-      message: There are {{ $value }} different semantic versions of Kubernetes components running.
+      message: There are {{ $value }} different semantic versions of Kubernetes components
+        running.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeversionmismatch
    expr: |
      count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*).*"))) > 1
@ -337,7 +364,8 @@ groups:
      severity: warning
  - alert: KubeClientErrors
    annotations:
-      message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.'
+      message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance
+        }}' is experiencing {{ $value | humanizePercentage }} errors.'
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors
    expr: |
      (sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job)
@ -405,7 +433,8 @@ groups:
  rules:
  - alert: KubeClientCertificateExpiration
    annotations:
-      message: A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.
+      message: A client certificate used to authenticate to the apiserver is expiring
+        in less than 7.0 days.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
    expr: |
      apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 604800
@ -413,7 +442,8 @@ groups:
      severity: warning
  - alert: KubeClientCertificateExpiration
    annotations:
-      message: A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.
+      message: A client certificate used to authenticate to the apiserver is expiring
+        in less than 24.0 hours.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
    expr: |
      apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 86400
@ -421,7 +451,9 @@ groups:
      severity: critical
  - alert: AggregatedAPIErrors
    annotations:
-      message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. The number of errors have increased for it in the past five minutes. High values indicate that the availability of the service changes too often.
+      message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported
+        errors. The number of errors have increased for it in the past five minutes.
+        High values indicate that the availability of the service changes too often.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapierrors
    expr: |
      sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[5m])) > 2
@ -429,7 +461,8 @@ groups:
      severity: warning
  - alert: AggregatedAPIDown
    annotations:
-      message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 5m.
+      message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been
+        only {{ $value | humanize }}% available over the last 5m.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapidown
    expr: |
      (1 - max by(name, namespace)(avg_over_time(aggregator_unavailable_apiservice[5m]))) * 100 < 90
@ -466,7 +499,8 @@ groups:
      severity: warning
  - alert: KubeletTooManyPods
    annotations:
-      message: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity.
+      message: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage
+        }} of its Pod capacity.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods
    expr: |
      count by(node) (
@ -481,7 +515,8 @@ groups:
      severity: warning
  - alert: KubeNodeReadinessFlapping
    annotations:
-      message: The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes.
+      message: The readiness status of node {{ $labels.node }} has changed {{ $value
+        }} times in the last 15 minutes.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodereadinessflapping
    expr: |
      sum(changes(kube_node_status_condition{status="true",condition="Ready"}[15m])) by (node) > 2
@ -490,7 +525,8 @@ groups:
      severity: warning
  - alert: KubeletPlegDurationHigh
    annotations:
-      message: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.
+      message: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration
+        of {{ $value }} seconds on node {{ $labels.node }}.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletplegdurationhigh
    expr: |
      node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
@ -499,7 +535,8 @@ groups:
      severity: warning
  - alert: KubeletPodStartUpLatencyHigh
    annotations:
-      message: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}.
+      message: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds
+        on node {{ $labels.node }}.
      runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh
    expr: |
      histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name{job="kubelet"} > 60
--- a/assets/node-exporter/alerts.yaml
+++ b/assets/node-exporter/alerts.yaml
@ -3,7 +3,8 @@ groups:
  rules:
  - alert: NodeFilesystemSpaceFillingUp
    annotations:
-      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up.
+      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has
+        only {{ printf "%.2f" $value }}% available space left and is filling up.
      summary: Filesystem is predicted to run out of space within the next 24 hours.
    expr: |
      (
@ -18,7 +19,8 @@ groups:
      severity: warning
  - alert: NodeFilesystemSpaceFillingUp
    annotations:
-      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast.
+      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has
+        only {{ printf "%.2f" $value }}% available space left and is filling up fast.
      summary: Filesystem is predicted to run out of space within the next 4 hours.
    expr: |
      (
@ -33,7 +35,8 @@ groups:
      severity: critical
  - alert: NodeFilesystemAlmostOutOfSpace
    annotations:
-      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
+      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has
+        only {{ printf "%.2f" $value }}% available space left.
      summary: Filesystem has less than 5% space left.
    expr: |
      (
@ -46,7 +49,8 @@ groups:
      severity: warning
  - alert: NodeFilesystemAlmostOutOfSpace
    annotations:
-      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
+      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has
+        only {{ printf "%.2f" $value }}% available space left.
      summary: Filesystem has less than 3% space left.
    expr: |
      (
@ -59,7 +63,8 @@ groups:
      severity: critical
  - alert: NodeFilesystemFilesFillingUp
    annotations:
-      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up.
+      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has
+        only {{ printf "%.2f" $value }}% available inodes left and is filling up.
      summary: Filesystem is predicted to run out of inodes within the next 24 hours.
    expr: |
      (
@ -74,7 +79,8 @@ groups:
      severity: warning
  - alert: NodeFilesystemFilesFillingUp
    annotations:
-      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast.
+      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has
+        only {{ printf "%.2f" $value }}% available inodes left and is filling up fast.
      summary: Filesystem is predicted to run out of inodes within the next 4 hours.
    expr: |
      (
@ -89,7 +95,8 @@ groups:
      severity: critical
  - alert: NodeFilesystemAlmostOutOfFiles
    annotations:
-      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
+      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has
+        only {{ printf "%.2f" $value }}% available inodes left.
      summary: Filesystem has less than 5% inodes left.
    expr: |
      (
@ -102,7 +109,8 @@ groups:
      severity: warning
  - alert: NodeFilesystemAlmostOutOfFiles
    annotations:
-      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
+      description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has
+        only {{ printf "%.2f" $value }}% available inodes left.
      summary: Filesystem has less than 3% inodes left.
    expr: |
      (
@ -115,7 +123,8 @@ groups:
      severity: critical
  - alert: NodeNetworkReceiveErrs
    annotations:
-      description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.'
+      description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
+        {{ printf "%.0f" $value }} receive errors in the last two minutes.'
      summary: Network interface is reporting many receive errors.
    expr: |
      increase(node_network_receive_errs_total[2m]) > 10
@ -124,7 +133,8 @@ groups:
      severity: warning
  - alert: NodeNetworkTransmitErrs
    annotations:
-      description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
+      description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
+        {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
      summary: Network interface is reporting many transmit errors.
    expr: |
      increase(node_network_transmit_errs_total[2m]) > 10
@ -149,7 +159,8 @@ groups:
      severity: warning
  - alert: NodeClockSkewDetected
    annotations:
-      message: Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure NTP is configured correctly on this host.
+      message: Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure
+        NTP is configured correctly on this host.
      summary: Clock skew detected.
    expr: |
      (
@ -168,7 +179,8 @@ groups:
      severity: warning
  - alert: NodeClockNotSynchronising
    annotations:
-      message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host.
+      message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is
+        configured on this host.
      summary: Clock not synchronising.
    expr: |
      min_over_time(node_timex_sync_status[5m]) == 0
--- a/assets/prometheus/alerts.yaml
+++ b/assets/prometheus/alerts.yaml
@ -14,8 +14,10 @@ groups:
      severity: critical
  - alert: PrometheusNotificationQueueRunningFull
    annotations:
-      description: Alert notification queue of Prometheus {{$labels.instance}} is running full.
-      summary: Prometheus alert notification queue predicted to run full in less than 30m.
+      description: Alert notification queue of Prometheus {{$labels.instance}} is
+        running full.
+      summary: Prometheus alert notification queue predicted to run full in less than
+        30m.
    expr: |
      # Without min_over_time, failed scrapes could create false negatives, see
      # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
@ -29,8 +31,10 @@ groups:
      severity: warning
  - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
    annotations:
-      description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus {{$labels.instance}} to Alertmanager {{$labels.alertmanager}}.'
-      summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.
+      description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus
+        {{$labels.instance}} to Alertmanager {{$labels.alertmanager}}.'
+      summary: Prometheus has encountered more than 1% errors sending alerts to a
+        specific Alertmanager.
    expr: |
      (
        rate(prometheus_notifications_errors_total{job="prometheus"}[5m])
@ -44,7 +48,8 @@ groups:
      severity: warning
  - alert: PrometheusErrorSendingAlertsToAnyAlertmanager
    annotations:
-      description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.instance}} to any Alertmanager.'
+      description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts
+        from Prometheus {{$labels.instance}} to any Alertmanager.'
      summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
    expr: |
      min without(alertmanager) (
@ -70,7 +75,8 @@ groups:
      severity: warning
  - alert: PrometheusTSDBReloadsFailing
    annotations:
-      description: Prometheus {{$labels.instance}} has detected {{$value | humanize}} reload failures over the last 3h.
+      description: Prometheus {{$labels.instance}} has detected {{$value | humanize}}
+        reload failures over the last 3h.
      summary: Prometheus has issues reloading blocks from disk.
    expr: |
      increase(prometheus_tsdb_reloads_failures_total{job="prometheus"}[3h]) > 0
@ -79,7 +85,8 @@ groups:
      severity: warning
  - alert: PrometheusTSDBCompactionsFailing
    annotations:
-      description: Prometheus {{$labels.instance}} has detected {{$value | humanize}} compaction failures over the last 3h.
+      description: Prometheus {{$labels.instance}} has detected {{$value | humanize}}
+        compaction failures over the last 3h.
      summary: Prometheus has issues compacting blocks.
    expr: |
      increase(prometheus_tsdb_compactions_failed_total{job="prometheus"}[3h]) > 0
@ -97,7 +104,8 @@ groups:
      severity: warning
  - alert: PrometheusDuplicateTimestamps
    annotations:
-      description: Prometheus {{$labels.instance}} is dropping {{ printf "%.4g" $value  }} samples/s with different values but duplicated timestamp.
+      description: Prometheus {{$labels.instance}} is dropping {{ printf "%.4g" $value  }}
+        samples/s with different values but duplicated timestamp.
      summary: Prometheus is dropping samples with duplicate timestamps.
    expr: |
      rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus"}[5m]) > 0
@ -106,7 +114,8 @@ groups:
      severity: warning
  - alert: PrometheusOutOfOrderTimestamps
    annotations:
-      description: Prometheus {{$labels.instance}} is dropping {{ printf "%.4g" $value  }} samples/s with timestamps arriving out of order.
+      description: Prometheus {{$labels.instance}} is dropping {{ printf "%.4g" $value  }}
+        samples/s with timestamps arriving out of order.
      summary: Prometheus drops samples with out-of-order timestamps.
    expr: |
      rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus"}[5m]) > 0
@ -115,7 +124,8 @@ groups:
      severity: warning
  - alert: PrometheusRemoteStorageFailures
    annotations:
-      description: Prometheus {{$labels.instance}} failed to send {{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }}
+      description: Prometheus {{$labels.instance}} failed to send {{ printf "%.1f"
+        $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }}
      summary: Prometheus fails to send samples to remote storage.
    expr: |
      (
@ -134,7 +144,8 @@ groups:
      severity: critical
  - alert: PrometheusRemoteWriteBehind
    annotations:
-      description: Prometheus {{$labels.instance}} remote write is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}.
+      description: Prometheus {{$labels.instance}} remote write is {{ printf "%.1f"
+        $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}.
      summary: Prometheus remote write is behind.
    expr: |
      # Without max_over_time, failed scrapes could create false negatives, see
@ -150,8 +161,12 @@ groups:
      severity: critical
  - alert: PrometheusRemoteWriteDesiredShards
    annotations:
-      description: Prometheus {{$labels.instance}} remote write desired shards calculation wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus"}` $labels.instance | query | first | value }}.
-      summary: Prometheus remote write desired shards calculation wants to run more than configured max shards.
+      description: Prometheus {{$labels.instance}} remote write desired shards calculation
+        wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url
+        }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus"}`
+        $labels.instance | query | first | value }}.
+      summary: Prometheus remote write desired shards calculation wants to run more
+        than configured max shards.
    expr: |
      # Without max_over_time, failed scrapes could create false negatives, see
      # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
@ -165,7 +180,8 @@ groups:
      severity: warning
  - alert: PrometheusRuleFailures
    annotations:
-      description: Prometheus {{$labels.instance}} has failed to evaluate {{ printf "%.0f" $value }} rules in the last 5m.
+      description: Prometheus {{$labels.instance}} has failed to evaluate {{ printf
+        "%.0f" $value }} rules in the last 5m.
      summary: Prometheus is failing rule evaluations.
    expr: |
      increase(prometheus_rule_evaluation_failures_total{job="prometheus"}[5m]) > 0
@ -174,7 +190,8 @@ groups:
      severity: critical
  - alert: PrometheusMissingRuleEvaluations
    annotations:
-      description: Prometheus {{$labels.instance}} has missed {{ printf "%.0f" $value }} rule group evaluations in the last 5m.
+      description: Prometheus {{$labels.instance}} has missed {{ printf "%.0f" $value
+        }} rule group evaluations in the last 5m.
      summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
    expr: |
      increase(prometheus_rule_group_iterations_missed_total{job="prometheus"}[5m]) > 0
@ -183,8 +200,10 @@ groups:
      severity: warning
  - alert: PrometheusTargetLimitHit
    annotations:
-      description: Prometheus {{$labels.instance}} has dropped {{ printf "%.0f" $value }} targets because the number of targets exceeded the configured target_limit.
-      summary: Prometheus has dropped targets because some scrape configs have exceeded the targets limit.
+      description: Prometheus {{$labels.instance}} has dropped {{ printf "%.0f" $value
+        }} targets because the number of targets exceeded the configured target_limit.
+      summary: Prometheus has dropped targets because some scrape configs have exceeded
+        the targets limit.
    expr: |
      increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus"}[5m]) > 0
    for: 15m
--- a/assets/thanos/alerts.yaml
+++ b/assets/thanos/alerts.yaml
@ -3,7 +3,8 @@ groups:
  rules:
  - alert: ThanosCompactMultipleRunning
    annotations:
-      message: No more than one Thanos Compact instance should be running at once. There are {{ $value }}
+      message: No more than one Thanos Compact instance should be running at once.
+        There are {{ $value }}
    expr: sum(up{job=~"thanos-compact.*"}) > 1
    for: 5m
    labels:
@ -17,7 +18,8 @@ groups:
      severity: warning
  - alert: ThanosCompactHighCompactionFailures
    annotations:
-      message: Thanos Compact {{$labels.job}} is failing to execute {{ $value | humanize }}% of compactions.
+      message: Thanos Compact {{$labels.job}} is failing to execute {{ $value | humanize
+        }}% of compactions.
    expr: |
      (
        sum by (job) (rate(thanos_compact_group_compactions_failures_total{job=~"thanos-compact.*"}[5m]))
@ -30,7 +32,8 @@ groups:
      severity: warning
  - alert: ThanosCompactBucketHighOperationFailures
    annotations:
-      message: Thanos Compact {{$labels.job}} Bucket is failing to execute {{ $value | humanize }}% of operations.
+      message: Thanos Compact {{$labels.job}} Bucket is failing to execute {{ $value
+        | humanize }}% of operations.
    expr: |
      (
        sum by (job) (rate(thanos_objstore_bucket_operation_failures_total{job=~"thanos-compact.*"}[5m]))
@ -44,14 +47,16 @@ groups:
  - alert: ThanosCompactHasNotRun
    annotations:
      message: Thanos Compact {{$labels.job}} has not uploaded anything for 24 hours.
-    expr: (time() - max(max_over_time(thanos_objstore_bucket_last_successful_upload_time{job=~"thanos-compact.*"}[24h]))) / 60 / 60 > 24
+    expr: (time() - max(max_over_time(thanos_objstore_bucket_last_successful_upload_time{job=~"thanos-compact.*"}[24h])))
+      / 60 / 60 > 24
    labels:
      severity: warning
 - name: thanos-query.rules
  rules:
  - alert: ThanosQueryHttpRequestQueryErrorRateHigh
    annotations:
-      message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize }}% of "query" requests.
+      message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize
+        }}% of "query" requests.
    expr: |
      (
        sum(rate(http_requests_total{code=~"5..", job=~"thanos-query.*", handler="query"}[5m]))
@ -63,7 +68,8 @@ groups:
      severity: critical
  - alert: ThanosQueryHttpRequestQueryRangeErrorRateHigh
    annotations:
-      message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize }}% of "query_range" requests.
+      message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize
+        }}% of "query_range" requests.
    expr: |
      (
        sum(rate(http_requests_total{code=~"5..", job=~"thanos-query.*", handler="query_range"}[5m]))
@ -75,7 +81,8 @@ groups:
      severity: critical
  - alert: ThanosQueryGrpcServerErrorRate
    annotations:
-      message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.
+      message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize
+        }}% of requests.
    expr: |
      (
        sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-query.*"}[5m]))
@ -88,7 +95,8 @@ groups:
      severity: warning
  - alert: ThanosQueryGrpcClientErrorRate
    annotations:
-      message: Thanos Query {{$labels.job}} is failing to send {{ $value | humanize }}% of requests.
+      message: Thanos Query {{$labels.job}} is failing to send {{ $value | humanize
+        }}% of requests.
    expr: |
      (
        sum by (job) (rate(grpc_client_handled_total{grpc_code!="OK", job=~"thanos-query.*"}[5m]))
@ -100,7 +108,8 @@ groups:
      severity: warning
  - alert: ThanosQueryHighDNSFailures
    annotations:
-      message: Thanos Query {{$labels.job}} have {{ $value | humanize }}% of failing DNS queries for store endpoints.
+      message: Thanos Query {{$labels.job}} have {{ $value | humanize }}% of failing
+        DNS queries for store endpoints.
    expr: |
      (
        sum by (job) (rate(thanos_querier_store_apis_dns_failures_total{job=~"thanos-query.*"}[5m]))
@ -112,7 +121,8 @@ groups:
      severity: warning
  - alert: ThanosQueryInstantLatencyHigh
    annotations:
-      message: Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for instant queries.
+      message: Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value
+        }} seconds for instant queries.
    expr: |
      (
        histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~"thanos-query.*", handler="query"}[5m]))) > 40
@ -124,7 +134,8 @@ groups:
      severity: critical
  - alert: ThanosQueryRangeLatencyHigh
    annotations:
-      message: Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for range queries.
+      message: Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value
+        }} seconds for range queries.
    expr: |
      (
        histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~"thanos-query.*", handler="query_range"}[5m]))) > 90
@ -138,7 +149,8 @@ groups:
  rules:
  - alert: ThanosReceiveHttpRequestErrorRateHigh
    annotations:
-      message: Thanos Receive {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.
+      message: Thanos Receive {{$labels.job}} is failing to handle {{ $value | humanize
+        }}% of requests.
    expr: |
      (
        sum(rate(http_requests_total{code=~"5..", job=~"thanos-receive.*", handler="receive"}[5m]))
@ -150,7 +162,8 @@ groups:
      severity: critical
  - alert: ThanosReceiveHttpRequestLatencyHigh
    annotations:
-      message: Thanos Receive {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for requests.
+      message: Thanos Receive {{$labels.job}} has a 99th percentile latency of {{
+        $value }} seconds for requests.
    expr: |
      (
        histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~"thanos-receive.*", handler="receive"}[5m]))) > 10
@ -162,7 +175,8 @@ groups:
      severity: critical
  - alert: ThanosReceiveHighReplicationFailures
    annotations:
-      message: Thanos Receive {{$labels.job}} is failing to replicate {{ $value | humanize }}% of requests.
+      message: Thanos Receive {{$labels.job}} is failing to replicate {{ $value |
+        humanize }}% of requests.
    expr: |
      thanos_receive_replication_factor > 1
        and
@ -184,7 +198,8 @@ groups:
      severity: warning
  - alert: ThanosReceiveHighForwardRequestFailures
    annotations:
-      message: Thanos Receive {{$labels.job}} is failing to forward {{ $value | humanize }}% of requests.
+      message: Thanos Receive {{$labels.job}} is failing to forward {{ $value | humanize
+        }}% of requests.
    expr: |
      (
        sum by (job) (rate(thanos_receive_forward_requests_total{result="error", job=~"thanos-receive.*"}[5m]))
@ -196,7 +211,8 @@ groups:
      severity: warning
  - alert: ThanosReceiveHighHashringFileRefreshFailures
    annotations:
-      message: Thanos Receive {{$labels.job}} is failing to refresh hashring file, {{ $value | humanize }} of attempts failed.
+      message: Thanos Receive {{$labels.job}} is failing to refresh hashring file,
+        {{ $value | humanize }} of attempts failed.
    expr: |
      (
        sum by (job) (rate(thanos_receive_hashrings_file_errors_total{job=~"thanos-receive.*"}[5m]))
@ -209,14 +225,17 @@ groups:
      severity: warning
  - alert: ThanosReceiveConfigReloadFailure
    annotations:
-      message: Thanos Receive {{$labels.job}} has not been able to reload hashring configurations.
-    expr: avg(thanos_receive_config_last_reload_successful{job=~"thanos-receive.*"}) by (job) != 1
+      message: Thanos Receive {{$labels.job}} has not been able to reload hashring
+        configurations.
+    expr: avg(thanos_receive_config_last_reload_successful{job=~"thanos-receive.*"})
+      by (job) != 1
    for: 5m
    labels:
      severity: warning
  - alert: ThanosReceiveNoUpload
    annotations:
-      message: Thanos Receive {{ $labels.instance }} of {{$labels.job}} has not uploaded latest data to object storage.
+      message: Thanos Receive {{ $labels.instance }} of {{$labels.job}} has not uploaded
+        latest data to object storage.
    expr: |
      (up{job=~"thanos-receive.*"} - 1)
      + on (instance) # filters to only alert on current instance last 3h
@ -236,7 +255,8 @@ groups:
      severity: critical
  - alert: ThanosSidecarUnhealthy
    annotations:
-      message: Thanos Sidecar {{$labels.job}} {{$labels.pod}} is unhealthy for {{ $value }} seconds.
+      message: Thanos Sidecar {{$labels.job}} {{$labels.pod}} is unhealthy for {{
+        $value }} seconds.
    expr: |
      time() - max(thanos_sidecar_last_heartbeat_success_time_seconds{job=~"thanos-sidecar.*"}) by (job, pod) >= 600
    labels:
@ -245,7 +265,8 @@ groups:
  rules:
  - alert: ThanosStoreGrpcErrorRate
    annotations:
-      message: Thanos Store {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.
+      message: Thanos Store {{$labels.job}} is failing to handle {{ $value | humanize
+        }}% of requests.
    expr: |
      (
        sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-store.*"}[5m]))
@ -258,7 +279,8 @@ groups:
      severity: warning
  - alert: ThanosStoreSeriesGateLatencyHigh
    annotations:
-      message: Thanos Store {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for store series gate requests.
+      message: Thanos Store {{$labels.job}} has a 99th percentile latency of {{ $value
+        }} seconds for store series gate requests.
    expr: |
      (
        histogram_quantile(0.9, sum by (job, le) (rate(thanos_bucket_store_series_gate_duration_seconds_bucket{job=~"thanos-store.*"}[5m]))) > 2
@ -270,7 +292,8 @@ groups:
      severity: warning
  - alert: ThanosStoreBucketHighOperationFailures
    annotations:
-      message: Thanos Store {{$labels.job}} Bucket is failing to execute {{ $value | humanize }}% of operations.
+      message: Thanos Store {{$labels.job}} Bucket is failing to execute {{ $value
+        | humanize }}% of operations.
    expr: |
      (
        sum by (job) (rate(thanos_objstore_bucket_operation_failures_total{job=~"thanos-store.*"}[5m]))
@ -283,7 +306,8 @@ groups:
      severity: warning
  - alert: ThanosStoreObjstoreOperationLatencyHigh
    annotations:
-      message: Thanos Store {{$labels.job}} Bucket has a 99th percentile latency of {{ $value }} seconds for the bucket operations.
+      message: Thanos Store {{$labels.job}} Bucket has a 99th percentile latency of
+        {{ $value }} seconds for the bucket operations.
    expr: |
      (
        histogram_quantile(0.9, sum by (job, le) (rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~"thanos-store.*"}[5m]))) > 2
@ -305,7 +329,8 @@ groups:
      severity: critical
  - alert: ThanosRuleSenderIsFailingAlerts
    annotations:
-      message: Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to send alerts to alertmanager.
+      message: Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to send alerts
+        to alertmanager.
    expr: |
      sum by (job) (rate(thanos_alert_sender_alerts_dropped_total{job=~"thanos-rule.*"}[5m])) > 0
    for: 5m
@ -313,7 +338,8 @@ groups:
      severity: critical
  - alert: ThanosRuleHighRuleEvaluationFailures
    annotations:
-      message: Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to evaluate rules.
+      message: Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to evaluate
+        rules.
    expr: |
      (
        sum by (job) (rate(prometheus_rule_evaluation_failures_total{job=~"thanos-rule.*"}[5m]))
@ -326,7 +352,8 @@ groups:
      severity: critical
  - alert: ThanosRuleHighRuleEvaluationWarnings
    annotations:
-      message: Thanos Rule {{$labels.job}} {{$labels.pod}} has high number of evaluation warnings.
+      message: Thanos Rule {{$labels.job}} {{$labels.pod}} has high number of evaluation
+        warnings.
    expr: |
      sum by (job) (rate(thanos_rule_evaluation_with_warnings_total{job=~"thanos-rule.*"}[5m])) > 0
    for: 15m
@ -334,7 +361,8 @@ groups:
      severity: info
  - alert: ThanosRuleRuleEvaluationLatencyHigh
    annotations:
-      message: Thanos Rule {{$labels.job}}/{{$labels.pod}} has higher evaluation latency than interval for {{$labels.rule_group}}.
+      message: Thanos Rule {{$labels.job}}/{{$labels.pod}} has higher evaluation latency
+        than interval for {{$labels.rule_group}}.
    expr: |
      (
        sum by (job, pod, rule_group) (prometheus_rule_group_last_duration_seconds{job=~"thanos-rule.*"})
@ -346,7 +374,8 @@ groups:
      severity: warning
  - alert: ThanosRuleGrpcErrorRate
    annotations:
-      message: Thanos Rule {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.
+      message: Thanos Rule {{$labels.job}} is failing to handle {{ $value | humanize
+        }}% of requests.
    expr: |
      (
        sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-rule.*"}[5m]))
@ -360,13 +389,15 @@ groups:
  - alert: ThanosRuleConfigReloadFailure
    annotations:
      message: Thanos Rule {{$labels.job}} has not been able to reload its configuration.
-    expr: avg(thanos_rule_config_last_reload_successful{job=~"thanos-rule.*"}) by (job) != 1
+    expr: avg(thanos_rule_config_last_reload_successful{job=~"thanos-rule.*"}) by
+      (job) != 1
    for: 5m
    labels:
      severity: info
  - alert: ThanosRuleQueryHighDNSFailures
    annotations:
-      message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing DNS queries for query endpoints.
+      message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing
+        DNS queries for query endpoints.
    expr: |
      (
        sum by (job) (rate(thanos_ruler_query_apis_dns_failures_total{job=~"thanos-rule.*"}[5m]))
@ -379,7 +410,8 @@ groups:
      severity: warning
  - alert: ThanosRuleAlertmanagerHighDNSFailures
    annotations:
-      message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing DNS queries for Alertmanager endpoints.
+      message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing
+        DNS queries for Alertmanager endpoints.
    expr: |
      (
        sum by (job) (rate(thanos_ruler_alertmanagers_dns_failures_total{job=~"thanos-rule.*"}[5m]))
@ -392,7 +424,8 @@ groups:
      severity: warning
  - alert: ThanosRuleNoEvaluationFor10Intervals
    annotations:
-      message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% rule groups that did not evaluate for at least 10x of their expected interval.
+      message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% rule groups
+        that did not evaluate for at least 10x of their expected interval.
    expr: |
      time() -  max by (job, group) (prometheus_rule_group_last_evaluation_timestamp_seconds{job=~"thanos-rule.*"})
      >
@ -402,7 +435,8 @@ groups:
      severity: info
  - alert: ThanosNoRuleEvaluations
    annotations:
-      message: Thanos Rule {{$labels.job}} did not perform any rule evaluations in the past 2 minutes.
+      message: Thanos Rule {{$labels.job}} did not perform any rule evaluations in
+        the past 2 minutes.
    expr: |
      sum(rate(prometheus_rule_evaluations_total{job=~"thanos-rule.*"}[2m])) <= 0
        and
@ -472,7 +506,8 @@ groups:
      severity: critical
  - alert: ThanosBucketReplicateErrorRate
    annotations:
-      message: Thanos Replicate failing to run, {{ $value | humanize }}% of attempts failed.
+      message: Thanos Replicate failing to run, {{ $value | humanize }}% of attempts
+        failed.
    expr: |
      (
        sum(rate(thanos_replicate_replication_runs_total{result="error", job=~"thanos-bucket-replicate.*"}[5m]))
@ -484,7 +519,8 @@ groups:
      severity: critical
  - alert: ThanosBucketReplicateRunLatency
    annotations:
-      message: Thanos Replicate {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for the replicate operations.
+      message: Thanos Replicate {{$labels.job}} has a 99th percentile latency of {{
+        $value }} seconds for the replicate operations.
    expr: |
      (
        histogram_quantile(0.9, sum by (job, le) (rate(thanos_replicate_replication_run_duration_seconds_bucket{job=~"thanos-bucket-replicate.*"}[5m]))) > 20
--- a/site/content/ceph/_index.md
+++ b/site/content/ceph/_index.md
@ -59,7 +59,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: CephMdsMissingReplicas
 annotations:
-  description: Minimum required replicas for storage metadata service not available. Might affect the working of storage cluster.
+  description: Minimum required replicas for storage metadata service not available.
+    Might affect the working of storage cluster.
  message: Insufficient replicas for storage metadata service.
  severity_level: warning
  storage_type: ceph
@ -93,7 +94,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: CephMonHighNumberOfLeaderChanges
 annotations:
-  description: Ceph Monitor {{ $labels.ceph_daemon }} on host {{ $labels.hostname }} has seen {{ $value | printf "%.2f" }} leader changes per minute recently.
+  description: Ceph Monitor {{ $labels.ceph_daemon }} on host {{ $labels.hostname
+    }} has seen {{ $value | printf "%.2f" }} leader changes per minute recently.
  message: Storage Cluster has seen many leader changes recently.
  severity_level: warning
  storage_type: ceph
@ -129,7 +131,9 @@ labels:
 {{< code lang="yaml" >}}
 alert: CephOSDCriticallyFull
 annotations:
-  description: Utilization of back-end storage device {{ $labels.ceph_daemon }} has crossed 85% on host {{ $labels.hostname }}. Immediately free up some space or expand the storage cluster or contact support.
+  description: Utilization of back-end storage device {{ $labels.ceph_daemon }} has
+    crossed 85% on host {{ $labels.hostname }}. Immediately free up some space or
+    expand the storage cluster or contact support.
  message: Back-end storage device is critically full.
  severity_level: error
  storage_type: ceph
@ -145,7 +149,9 @@ labels:
 {{< code lang="yaml" >}}
 alert: CephOSDNearFull
 annotations:
-  description: Utilization of back-end storage device {{ $labels.ceph_daemon }} has crossed 75% on host {{ $labels.hostname }}. Free up some space or expand the storage cluster or contact support.
+  description: Utilization of back-end storage device {{ $labels.ceph_daemon }} has
+    crossed 75% on host {{ $labels.hostname }}. Free up some space or expand the storage
+    cluster or contact support.
  message: Back-end storage device is nearing full.
  severity_level: warning
  storage_type: ceph
@ -161,7 +167,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: CephOSDDiskNotResponding
 annotations:
-  description: Disk device {{ $labels.device }} not responding, on host {{ $labels.host }}.
+  description: Disk device {{ $labels.device }} not responding, on host {{ $labels.host
+    }}.
  message: Disk not responding
  severity_level: error
  storage_type: ceph
@ -177,7 +184,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: CephOSDDiskUnavailable
 annotations:
-  description: Disk device {{ $labels.device }} not accessible on host {{ $labels.host }}.
+  description: Disk device {{ $labels.device }} not accessible on host {{ $labels.host
+    }}.
  message: Disk not accessible
  severity_level: error
  storage_type: ceph
@ -227,8 +235,10 @@ labels:
 {{< code lang="yaml" >}}
 alert: PersistentVolumeUsageNearFull
 annotations:
-  description: PVC {{ $labels.persistentvolumeclaim }} utilization has crossed 75%. Free up some space or expand the PVC.
-  message: PVC {{ $labels.persistentvolumeclaim }} is nearing full. Data deletion or PVC expansion is required.
+  description: PVC {{ $labels.persistentvolumeclaim }} utilization has crossed 75%.
+    Free up some space or expand the PVC.
+  message: PVC {{ $labels.persistentvolumeclaim }} is nearing full. Data deletion
+    or PVC expansion is required.
  severity_level: warning
  storage_type: ceph
 expr: |
@ -243,8 +253,10 @@ labels:
 {{< code lang="yaml" >}}
 alert: PersistentVolumeUsageCritical
 annotations:
-  description: PVC {{ $labels.persistentvolumeclaim }} utilization has crossed 85%. Free up some space or expand the PVC immediately.
-  message: PVC {{ $labels.persistentvolumeclaim }} is critically full. Data deletion or PVC expansion is required.
+  description: PVC {{ $labels.persistentvolumeclaim }} utilization has crossed 85%.
+    Free up some space or expand the PVC immediately.
+  message: PVC {{ $labels.persistentvolumeclaim }} is critically full. Data deletion
+    or PVC expansion is required.
  severity_level: error
  storage_type: ceph
 expr: |
@ -327,8 +339,10 @@ labels:
 {{< code lang="yaml" >}}
 alert: CephClusterNearFull
 annotations:
-  description: Storage cluster utilization has crossed 75% and will become read-only at 85%. Free up some space or expand the storage cluster.
-  message: Storage cluster is nearing full. Data deletion or cluster expansion is required.
+  description: Storage cluster utilization has crossed 75% and will become read-only
+    at 85%. Free up some space or expand the storage cluster.
+  message: Storage cluster is nearing full. Data deletion or cluster expansion is
+    required.
  severity_level: warning
  storage_type: ceph
 expr: |
@ -343,8 +357,10 @@ labels:
 {{< code lang="yaml" >}}
 alert: CephClusterCriticallyFull
 annotations:
-  description: Storage cluster utilization has crossed 80% and will become read-only at 85%. Free up some space or expand the storage cluster immediately.
-  message: Storage cluster is critically full and needs immediate data deletion or cluster expansion.
+  description: Storage cluster utilization has crossed 80% and will become read-only
+    at 85%. Free up some space or expand the storage cluster immediately.
+  message: Storage cluster is critically full and needs immediate data deletion or
+    cluster expansion.
  severity_level: error
  storage_type: ceph
 expr: |
@ -359,8 +375,10 @@ labels:
 {{< code lang="yaml" >}}
 alert: CephClusterReadOnly
 annotations:
-  description: Storage cluster utilization has crossed 85% and will become read-only now. Free up some space or expand the storage cluster immediately.
-  message: Storage cluster is read-only now and needs immediate data deletion or cluster expansion.
+  description: Storage cluster utilization has crossed 85% and will become read-only
+    now. Free up some space or expand the storage cluster immediately.
+  message: Storage cluster is read-only now and needs immediate data deletion or cluster
+    expansion.
  severity_level: error
  storage_type: ceph
 expr: |
--- a/site/content/coredns/_index.md
+++ b/site/content/coredns/_index.md
@ -39,7 +39,8 @@ https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-core
 {{< code lang="yaml" >}}
 alert: CoreDNSLatencyHigh
 annotations:
-  message: CoreDNS has 99th percentile latency of {{ $value }} seconds for server {{ $labels.server }} zone {{ $labels.zone }} .
+  message: CoreDNS has 99th percentile latency of {{ $value }} seconds for server
+    {{ $labels.server }} zone {{ $labels.zone }} .
  runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednslatencyhigh
 expr: |
  histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_seconds_bucket{job="kube-dns"}[5m])) by(server, zone, le)) > 4
@ -54,7 +55,8 @@ https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-core
 {{< code lang="yaml" >}}
 alert: CoreDNSErrorsHigh
 annotations:
-  message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of requests.
+  message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of
+    requests.
  runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednserrorshigh
 expr: |
  sum(rate(coredns_dns_response_rcode_count_total{job="kube-dns",rcode="SERVFAIL"}[5m]))
@ -71,7 +73,8 @@ https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-core
 {{< code lang="yaml" >}}
 alert: CoreDNSErrorsHigh
 annotations:
-  message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of requests.
+  message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of
+    requests.
  runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednserrorshigh
 expr: |
  sum(rate(coredns_dns_response_rcode_count_total{job="kube-dns",rcode="SERVFAIL"}[5m]))
@ -90,7 +93,8 @@ https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-core
 {{< code lang="yaml" >}}
 alert: CoreDNSForwardLatencyHigh
 annotations:
-  message: CoreDNS has 99th percentile latency of {{ $value }} seconds forwarding requests to {{ $labels.to }}.
+  message: CoreDNS has 99th percentile latency of {{ $value }} seconds forwarding
+    requests to {{ $labels.to }}.
  runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsforwardlatencyhigh
 expr: |
  histogram_quantile(0.99, sum(rate(coredns_forward_request_duration_seconds_bucket{job="kube-dns"}[5m])) by(to, le)) > 4
@ -105,7 +109,8 @@ https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-core
 {{< code lang="yaml" >}}
 alert: CoreDNSForwardErrorsHigh
 annotations:
-  message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of forward requests to {{ $labels.to }}.
+  message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of
+    forward requests to {{ $labels.to }}.
  runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsforwarderrorshigh
 expr: |
  sum(rate(coredns_forward_response_rcode_count_total{job="kube-dns",rcode="SERVFAIL"}[5m]))
@ -122,7 +127,8 @@ https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-core
 {{< code lang="yaml" >}}
 alert: CoreDNSForwardErrorsHigh
 annotations:
-  message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of forward requests to {{ $labels.to }}.
+  message: CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of
+    forward requests to {{ $labels.to }}.
  runbook_url: https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsforwarderrorshigh
 expr: |
  sum(rate(coredns_dns_response_rcode_count_total{job="kube-dns",rcode="SERVFAIL"}[5m]))
--- a/site/content/cortex/_index.md
+++ b/site/content/cortex/_index.md
--- a/site/content/etcd/_index.md
+++ b/site/content/etcd/_index.md
@ -56,7 +56,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: etcdNoLeader
 annotations:
-  message: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }} has no leader.'
+  message: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }} has no
+    leader.'
 expr: |
  etcd_server_has_leader{job=~".*etcd.*"} == 0
 for: 1m
@ -69,7 +70,9 @@ labels:
 {{< code lang="yaml" >}}
 alert: etcdHighNumberOfLeaderChanges
 annotations:
-  message: 'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.'
+  message: 'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes within the
+    last 15 minutes. Frequent elections may be a sign of insufficient resources, high
+    network latency, or disruptions by other components and should be investigated.'
 expr: |
  increase((max without (instance) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"}) or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m]) >= 4
 for: 5m
@ -82,7 +85,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: etcdHighNumberOfFailedGRPCRequests
 annotations:
-  message: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
+  message: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method
+    }} failed on etcd instance {{ $labels.instance }}.'
 expr: |
  100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) without (grpc_type, grpc_code)
    /
@ -98,7 +102,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: etcdHighNumberOfFailedGRPCRequests
 annotations:
-  message: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
+  message: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for {{ $labels.grpc_method
+    }} failed on etcd instance {{ $labels.instance }}.'
 expr: |
  100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) without (grpc_type, grpc_code)
    /
@ -114,7 +119,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: etcdGRPCRequestsSlow
 annotations:
-  message: 'etcd cluster "{{ $labels.job }}": gRPC requests to {{ $labels.grpc_method }} are taking {{ $value }}s on etcd instance {{ $labels.instance }}.'
+  message: 'etcd cluster "{{ $labels.job }}": gRPC requests to {{ $labels.grpc_method
+    }} are taking {{ $value }}s on etcd instance {{ $labels.instance }}.'
 expr: |
  histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_type="unary"}[5m])) without(grpc_type))
  > 0.15
@ -128,7 +134,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: etcdMemberCommunicationSlow
 annotations:
-  message: 'etcd cluster "{{ $labels.job }}": member communication with {{ $labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance }}.'
+  message: 'etcd cluster "{{ $labels.job }}": member communication with {{ $labels.To
+    }} is taking {{ $value }}s on etcd instance {{ $labels.instance }}.'
 expr: |
  histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m]))
  > 0.15
@ -142,7 +149,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: etcdHighNumberOfFailedProposals
 annotations:
-  message: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures within the last 30 minutes on etcd instance {{ $labels.instance }}.'
+  message: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures within
+    the last 30 minutes on etcd instance {{ $labels.instance }}.'
 expr: |
  rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5
 for: 15m
@ -155,7 +163,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: etcdHighFsyncDurations
 annotations:
-  message: 'etcd cluster "{{ $labels.job }}": 99th percentile fync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.'
+  message: 'etcd cluster "{{ $labels.job }}": 99th percentile fync durations are {{
+    $value }}s on etcd instance {{ $labels.instance }}.'
 expr: |
  histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
  > 0.5
@ -169,7 +178,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: etcdHighCommitDurations
 annotations:
-  message: 'etcd cluster "{{ $labels.job }}": 99th percentile commit durations {{ $value }}s on etcd instance {{ $labels.instance }}.'
+  message: 'etcd cluster "{{ $labels.job }}": 99th percentile commit durations {{
+    $value }}s on etcd instance {{ $labels.instance }}.'
 expr: |
  histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
  > 0.25
@ -183,7 +193,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: etcdHighNumberOfFailedHTTPRequests
 annotations:
-  message: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}'
+  message: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance
+    {{ $labels.instance }}'
 expr: |
  sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) without (code) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
  without (code) > 0.01
@ -197,7 +208,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: etcdHighNumberOfFailedHTTPRequests
 annotations:
-  message: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}.'
+  message: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance
+    {{ $labels.instance }}.'
 expr: |
  sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) without (code) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
  without (code) > 0.05
@ -211,7 +223,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: etcdHTTPRequestsSlow
 annotations:
-  message: etcd instance {{ $labels.instance }} HTTP requests to {{ $labels.method }} are slow.
+  message: etcd instance {{ $labels.instance }} HTTP requests to {{ $labels.method
+    }} are slow.
 expr: |
  histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m]))
  > 0.15
--- a/site/content/gluster/_index.md
+++ b/site/content/gluster/_index.md
@ -96,7 +96,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: GlusterBrickUtilization
 annotations:
-  message: Gluster Brick {{$labels.host}}:{{$labels.brick_path}} Utilization more than 80%
+  message: Gluster Brick {{$labels.host}}:{{$labels.brick_path}} Utilization more
+    than 80%
 expr: |
  100 * gluster_brick_capacity_used_bytes{job="glusterd2-client"}
      / gluster_brick_capacity_bytes_total{job="glusterd2-client"} > 80
@ -110,7 +111,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: GlusterBrickUtilization
 annotations:
-  message: Gluster Brick {{$labels.host}}:{{$labels.brick_path}} Utilization more than 90%
+  message: Gluster Brick {{$labels.host}}:{{$labels.brick_path}} Utilization more
+    than 90%
 expr: |
  100 * gluster_brick_capacity_used_bytes{job="glusterd2-client"}
      / gluster_brick_capacity_bytes_total{job="glusterd2-client"} > 90
@ -126,7 +128,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: GlusterThinpoolDataUtilization
 annotations:
-  message: Gluster Thinpool {{ $labels.thinpool_name }} Data Utilization more than 80%
+  message: Gluster Thinpool {{ $labels.thinpool_name }} Data Utilization more than
+    80%
 expr: |
  gluster_thinpool_data_used_bytes{job="glusterd2-client"} / gluster_thinpool_data_total_bytes{job="glusterd2-client"} > 0.8
 for: 5m
@ -139,7 +142,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: GlusterThinpoolDataUtilization
 annotations:
-  message: Gluster Thinpool {{ $labels.thinpool_name }} Data Utilization more than 90%
+  message: Gluster Thinpool {{ $labels.thinpool_name }} Data Utilization more than
+    90%
 expr: |
  gluster_thinpool_data_used_bytes{job="glusterd2-client"} / gluster_thinpool_data_total_bytes{job="glusterd2-client"} > 0.9
 for: 5m
@ -152,7 +156,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: GlusterThinpoolMetadataUtilization
 annotations:
-  message: Gluster Thinpool {{ $labels.thinpool_name }} Metadata Utilization more than 80%
+  message: Gluster Thinpool {{ $labels.thinpool_name }} Metadata Utilization more
+    than 80%
 expr: |
  gluster_thinpool_metadata_used_bytes{job="glusterd2-client"} / gluster_thinpool_metadata_total_bytes{job="glusterd2-client"} > 0.8
 for: 5m
@ -165,7 +170,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: GlusterThinpoolMetadataUtilization
 annotations:
-  message: Gluster Thinpool {{ $labels.thinpool_name }} Metadata Utilization more than 90%
+  message: Gluster Thinpool {{ $labels.thinpool_name }} Metadata Utilization more
+    than 90%
 expr: |
  gluster_thinpool_metadata_used_bytes{job="glusterd2-client"} / gluster_thinpool_metadata_total_bytes{job="glusterd2-client"} > 0.9
 for: 5m
--- a/site/content/jaeger/_index.md
+++ b/site/content/jaeger/_index.md
@ -38,7 +38,9 @@ alert: JaegerAgentHTTPServerErrs
 annotations:
  message: |
    {{ $labels.job }} {{ $labels.instance }} is experiencing {{ printf "%.2f" $value }}% HTTP errors.
-expr: 100 * sum(rate(jaeger_agent_http_server_errors_total[1m])) by (instance, job, namespace) / sum(rate(jaeger_agent_http_server_total[1m])) by (instance, job, namespace)> 1
+expr: 100 * sum(rate(jaeger_agent_http_server_errors_total[1m])) by (instance, job,
+  namespace) / sum(rate(jaeger_agent_http_server_total[1m])) by (instance, job, namespace)>
+  1
 for: 15m
 labels:
  severity: warning
@ -51,7 +53,9 @@ alert: JaegerClientSpansDropped
 annotations:
  message: |
    service {{ $labels.job }} {{ $labels.instance }} is dropping {{ printf "%.2f" $value }}% spans.
-expr: 100 * sum(rate(jaeger_reporter_spans{result=~"dropped|err"}[1m])) by (instance, job, namespace) / sum(rate(jaeger_reporter_spans[1m])) by (instance, job, namespace)> 1
+expr: 100 * sum(rate(jaeger_reporter_spans{result=~"dropped|err"}[1m])) by (instance,
+  job, namespace) / sum(rate(jaeger_reporter_spans[1m])) by (instance, job, namespace)>
+  1
 for: 15m
 labels:
  severity: warning
@ -64,7 +68,9 @@ alert: JaegerAgentSpansDropped
 annotations:
  message: |
    agent {{ $labels.job }} {{ $labels.instance }} is dropping {{ printf "%.2f" $value }}% spans.
-expr: 100 * sum(rate(jaeger_agent_reporter_batches_failures_total[1m])) by (instance, job, namespace) / sum(rate(jaeger_agent_reporter_batches_submitted_total[1m])) by (instance, job, namespace)> 1
+expr: 100 * sum(rate(jaeger_agent_reporter_batches_failures_total[1m])) by (instance,
+  job, namespace) / sum(rate(jaeger_agent_reporter_batches_submitted_total[1m])) by
+  (instance, job, namespace)> 1
 for: 15m
 labels:
  severity: warning
@ -90,7 +96,9 @@ alert: JaegerCollectorDroppingSpans
 annotations:
  message: |
    collector {{ $labels.job }} {{ $labels.instance }} is dropping {{ printf "%.2f" $value }}% spans.
-expr: 100 * sum(rate(jaeger_collector_spans_dropped_total[1m])) by (instance, job, namespace) / sum(rate(jaeger_collector_spans_received_total[1m])) by (instance, job, namespace)> 1
+expr: 100 * sum(rate(jaeger_collector_spans_dropped_total[1m])) by (instance, job,
+  namespace) / sum(rate(jaeger_collector_spans_received_total[1m])) by (instance,
+  job, namespace)> 1
 for: 15m
 labels:
  severity: warning
@ -103,7 +111,9 @@ alert: JaegerSamplingUpdateFailing
 annotations:
  message: |
    {{ $labels.job }} {{ $labels.instance }} is failing {{ printf "%.2f" $value }}% in updating sampling policies.
-expr: 100 * sum(rate(jaeger_sampler_queries{result="err"}[1m])) by (instance, job, namespace) / sum(rate(jaeger_sampler_queries[1m])) by (instance, job, namespace)> 1
+expr: 100 * sum(rate(jaeger_sampler_queries{result="err"}[1m])) by (instance, job,
+  namespace) / sum(rate(jaeger_sampler_queries[1m])) by (instance, job, namespace)>
+  1
 for: 15m
 labels:
  severity: warning
@ -116,7 +126,8 @@ alert: JaegerCollectorPersistenceSlow
 annotations:
  message: |
    {{ $labels.job }} {{ $labels.instance }} is slow at persisting spans.
-expr: histogram_quantile(0.99, sum by (le) (rate(jaeger_collector_save_latency_bucket[1m]))) > 0.5
+expr: histogram_quantile(0.99, sum by (le) (rate(jaeger_collector_save_latency_bucket[1m])))
+  > 0.5
 for: 15m
 labels:
  severity: warning
@ -129,7 +140,9 @@ alert: JaegerThrottlingUpdateFailing
 annotations:
  message: |
    {{ $labels.job }} {{ $labels.instance }} is failing {{ printf "%.2f" $value }}% in updating throttling policies.
-expr: 100 * sum(rate(jaeger_throttler_updates{result="err"}[1m])) by (instance, job, namespace) / sum(rate(jaeger_throttler_updates[1m])) by (instance, job, namespace)> 1
+expr: 100 * sum(rate(jaeger_throttler_updates{result="err"}[1m])) by (instance, job,
+  namespace) / sum(rate(jaeger_throttler_updates[1m])) by (instance, job, namespace)>
+  1
 for: 15m
 labels:
  severity: warning
@ -142,7 +155,9 @@ alert: JaegerQueryReqsFailing
 annotations:
  message: |
    {{ $labels.job }} {{ $labels.instance }} is seeing {{ printf "%.2f" $value }}% query errors on {{ $labels.operation }}.
-expr: 100 * sum(rate(jaeger_query_requests_total{result="err"}[1m])) by (instance, job, namespace) / sum(rate(jaeger_query_requests_total[1m])) by (instance, job, namespace)> 1
+expr: 100 * sum(rate(jaeger_query_requests_total{result="err"}[1m])) by (instance,
+  job, namespace) / sum(rate(jaeger_query_requests_total[1m])) by (instance, job,
+  namespace)> 1
 for: 15m
 labels:
  severity: warning
@ -155,7 +170,9 @@ alert: JaegerCassandraWritesFailing
 annotations:
  message: |
    {{ $labels.job }} {{ $labels.instance }} is seeing {{ printf "%.2f" $value }}% query errors on {{ $labels.operation }}.
-expr: 100 * sum(rate(jaeger_cassandra_errors_total[1m])) by (instance, job, namespace) / sum(rate(jaeger_cassandra_attempts_total[1m])) by (instance, job, namespace)> 1
+expr: 100 * sum(rate(jaeger_cassandra_errors_total[1m])) by (instance, job, namespace)
+  / sum(rate(jaeger_cassandra_attempts_total[1m])) by (instance, job, namespace)>
+  1
 for: 15m
 labels:
  severity: warning
@ -168,7 +185,9 @@ alert: JaegerCassandraReadsFailing
 annotations:
  message: |
    {{ $labels.job }} {{ $labels.instance }} is seeing {{ printf "%.2f" $value }}% query errors on {{ $labels.operation }}.
-expr: 100 * sum(rate(jaeger_cassandra_read_errors_total[1m])) by (instance, job, namespace) / sum(rate(jaeger_cassandra_read_attempts_total[1m])) by (instance, job, namespace)> 1
+expr: 100 * sum(rate(jaeger_cassandra_read_errors_total[1m])) by (instance, job, namespace)
+  / sum(rate(jaeger_cassandra_read_attempts_total[1m])) by (instance, job, namespace)>
+  1
 for: 15m
 labels:
  severity: warning
--- a/site/content/kube-cockroachdb/_index.md
+++ b/site/content/kube-cockroachdb/_index.md
@ -23,7 +23,8 @@ Complete list of pregenerated alerts is available [here](https://github.com/moni
 {{< code lang="yaml" >}}
 alert: CockroachInstanceFlapping
 annotations:
-  message: '{{ $labels.instance }} for cluster {{ $labels.cluster }} restarted {{ $value }} time(s) in 10m'
+  message: '{{ $labels.instance }} for cluster {{ $labels.cluster }} restarted {{
+    $value }} time(s) in 10m'
 expr: |
  resets(cockroachdb_sys_uptime{job="cockroachdb-public"}[10m]) > 5
 for: 1m
@ -64,7 +65,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: CockroachStoreDiskLow
 annotations:
-  message: Store {{ $labels.store }} on node {{ $labels.instance }} at {{ $value }} available disk fraction
+  message: Store {{ $labels.store }} on node {{ $labels.instance }} at {{ $value }}
+    available disk fraction
 expr: |
  :cockroachdb_capacity_available:ratio{job="cockroachdb-public"} < 0.15
 for: 30m
@ -116,7 +118,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: CockroachHighOpenFDCount
 annotations:
-  message: 'Too many open file descriptors on {{ $labels.instance }}: {{ $value }} fraction used'
+  message: 'Too many open file descriptors on {{ $labels.instance }}: {{ $value }}
+    fraction used'
 expr: |
  cockroachdb_sys_fd_open{job="cockroachdb-public"} / cockroachdb_sys_fd_softlimit{job="cockroachdb-public"} > 0.8
 for: 10m
--- a/site/content/kube-state-metrics/_index.md
+++ b/site/content/kube-state-metrics/_index.md
@ -23,7 +23,10 @@ Complete list of pregenerated alerts is available [here](https://github.com/moni
 {{< code lang="yaml" >}}
 alert: KubeStateMetricsListErrors
 annotations:
-  message: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
+  description: kube-state-metrics is experiencing errors at an elevated rate in list
+    operations. This is likely causing it to not be able to expose metrics about Kubernetes
+    objects correctly or at all.
+  summary: kube-state-metrics is experiencing errors in list operations.
 expr: |
  (sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m]))
    /
@ -39,7 +42,10 @@ labels:
 {{< code lang="yaml" >}}
 alert: KubeStateMetricsWatchErrors
 annotations:
-  message: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
+  description: kube-state-metrics is experiencing errors at an elevated rate in watch
+    operations. This is likely causing it to not be able to expose metrics about Kubernetes
+    objects correctly or at all.
+  summary: kube-state-metrics is experiencing errors in watch operations.
 expr: |
  (sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m]))
    /
--- a/site/content/kubernetes/_index.md
+++ b/site/content/kubernetes/_index.md
@ -24,7 +24,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubePodCrashLooping
 annotations:
-  message: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
+  message: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }})
+    is restarting {{ printf "%.2f" $value }} times / 5 minutes.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping
 expr: |
  rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[5m]) * 60 * 5 > 0
@ -39,7 +40,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubePodNotReady
 annotations:
-  message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes.
+  message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state
+    for longer than 15 minutes.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready
 expr: |
  sum by (namespace, pod) (
@ -60,7 +62,9 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeDeploymentGenerationMismatch
 annotations:
-  message: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back.
+  message: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
+    }} does not match, this indicates that the Deployment has failed but has not been
+    rolled back.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentgenerationmismatch
 expr: |
  kube_deployment_status_observed_generation{job="kube-state-metrics"}
@ -77,7 +81,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeDeploymentReplicasMismatch
 annotations:
-  message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes.
+  message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched
+    the expected number of replicas for longer than 15 minutes.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch
 expr: |
  (
@ -100,7 +105,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeStatefulSetReplicasMismatch
 annotations:
-  message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes.
+  message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched
+    the expected number of replicas for longer than 15 minutes.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch
 expr: |
  (
@ -123,7 +129,9 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeStatefulSetGenerationMismatch
 annotations:
-  message: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back.
+  message: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
+    }} does not match, this indicates that the StatefulSet has failed but has not
+    been rolled back.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetgenerationmismatch
 expr: |
  kube_statefulset_status_observed_generation{job="kube-state-metrics"}
@ -140,7 +148,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeStatefulSetUpdateNotRolledOut
 annotations:
-  message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out.
+  message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has
+    not been rolled out.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetupdatenotrolledout
 expr: |
  (
@ -171,7 +180,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeDaemonSetRolloutStuck
 annotations:
-  message: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 15 minutes.
+  message: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished
+    or progressed for at least 15 minutes.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetrolloutstuck
 expr: |
  (
@ -208,7 +218,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeContainerWaiting
 annotations:
-  message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}} has been in waiting state for longer than 1 hour.
+  message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}}
+    has been in waiting state for longer than 1 hour.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting
 expr: |
  sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0
@ -223,7 +234,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeDaemonSetNotScheduled
 annotations:
-  message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled.'
+  message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
+    }} are not scheduled.'
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetnotscheduled
 expr: |
  kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"}
@ -240,7 +252,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeDaemonSetMisScheduled
 annotations:
-  message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run.'
+  message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
+    }} are running where they are not supposed to run.'
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetmisscheduled
 expr: |
  kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0
@ -255,7 +268,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeJobCompletion
 annotations:
-  message: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than 12 hours to complete.
+  message: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than
+    12 hours to complete.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion
 expr: |
  kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"}  > 0
@ -285,7 +299,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeHpaReplicasMismatch
 annotations:
-  message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched the desired number of replicas for longer than 15 minutes.
+  message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched the desired
+    number of replicas for longer than 15 minutes.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpareplicasmismatch
 expr: |
  (kube_hpa_status_desired_replicas{job="kube-state-metrics"}
@ -304,7 +319,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeHpaMaxedOut
 annotations:
-  message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running at max replicas for longer than 15 minutes.
+  message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running at max replicas
+    for longer than 15 minutes.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpamaxedout
 expr: |
  kube_hpa_status_current_replicas{job="kube-state-metrics"}
@ -323,7 +339,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeCPUOvercommit
 annotations:
-  message: Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure.
+  message: Cluster has overcommitted CPU resource requests for Pods and cannot tolerate
+    node failure.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
 expr: |
  sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum{})
@ -342,7 +359,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeMemoryOvercommit
 annotations:
-  message: Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure.
+  message: Cluster has overcommitted memory resource requests for Pods and cannot
+    tolerate node failure.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit
 expr: |
  sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum{})
@ -399,7 +417,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeQuotaFullyUsed
 annotations:
-  message: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.
+  message: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
+    }} of its {{ $labels.resource }} quota.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotafullyused
 expr: |
  kube_resourcequota{job="kube-state-metrics", type="used"}
@ -417,7 +436,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: CPUThrottlingHigh
 annotations:
-  message: '{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }}.'
+  message: '{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace
+    }} for container {{ $labels.container }} in pod {{ $labels.pod }}.'
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
 expr: |
  sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (container, pod, namespace)
@ -437,7 +457,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubePersistentVolumeFillingUp
 annotations:
-  message: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage }} free.
+  message: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in
+    Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage }} free.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
 expr: |
  kubelet_volume_stats_available_bytes{job="kubelet"}
@ -455,7 +476,9 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubePersistentVolumeFillingUp
 annotations:
-  message: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available.
+  message: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
+    }} in Namespace {{ $labels.namespace }} is expected to fill up within four days.
+    Currently {{ $value | humanizePercentage }} is available.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
 expr: |
  (
@ -476,7 +499,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubePersistentVolumeErrors
 annotations:
-  message: The persistent volume {{ $labels.persistentvolume }} has status {{ $labels.phase }}.
+  message: The persistent volume {{ $labels.persistentvolume }} has status {{ $labels.phase
+    }}.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeerrors
 expr: |
  kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0
@ -493,7 +517,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeVersionMismatch
 annotations:
-  message: There are {{ $value }} different semantic versions of Kubernetes components running.
+  message: There are {{ $value }} different semantic versions of Kubernetes components
+    running.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeversionmismatch
 expr: |
  count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*).*"))) > 1
@ -508,7 +533,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeClientErrors
 annotations:
-  message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.'
+  message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}'
+    is experiencing {{ $value | humanizePercentage }} errors.'
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors
 expr: |
  (sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job)
@ -606,7 +632,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeClientCertificateExpiration
 annotations:
-  message: A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.
+  message: A client certificate used to authenticate to the apiserver is expiring
+    in less than 7.0 days.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
 expr: |
  apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 604800
@ -620,7 +647,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeClientCertificateExpiration
 annotations:
-  message: A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.
+  message: A client certificate used to authenticate to the apiserver is expiring
+    in less than 24.0 hours.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
 expr: |
  apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 86400
@ -634,7 +662,9 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: AggregatedAPIErrors
 annotations:
-  message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. The number of errors have increased for it in the past five minutes. High values indicate that the availability of the service changes too often.
+  message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported
+    errors. The number of errors have increased for it in the past five minutes. High
+    values indicate that the availability of the service changes too often.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapierrors
 expr: |
  sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[5m])) > 2
@ -648,7 +678,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: AggregatedAPIDown
 annotations:
-  message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 5m.
+  message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only
+    {{ $value | humanize }}% available over the last 5m.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapidown
 expr: |
  (1 - max by(name, namespace)(avg_over_time(aggregator_unavailable_apiservice[5m]))) * 100 < 90
@ -709,7 +740,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeletTooManyPods
 annotations:
-  message: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity.
+  message: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage
+    }} of its Pod capacity.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods
 expr: |
  count by(node) (
@ -730,7 +762,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeNodeReadinessFlapping
 annotations:
-  message: The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes.
+  message: The readiness status of node {{ $labels.node }} has changed {{ $value }}
+    times in the last 15 minutes.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodereadinessflapping
 expr: |
  sum(changes(kube_node_status_condition{status="true",condition="Ready"}[15m])) by (node) > 2
@ -745,7 +778,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeletPlegDurationHigh
 annotations:
-  message: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.
+  message: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration
+    of {{ $value }} seconds on node {{ $labels.node }}.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletplegdurationhigh
 expr: |
  node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
@ -760,7 +794,8 @@ https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md
 {{< code lang="yaml" >}}
 alert: KubeletPodStartUpLatencyHigh
 annotations:
-  message: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}.
+  message: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on
+    node {{ $labels.node }}.
  runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh
 expr: |
  histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name{job="kubelet"} > 60
--- a/site/content/node-exporter/_index.md
+++ b/site/content/node-exporter/_index.md
@ -23,7 +23,8 @@ Complete list of pregenerated alerts is available [here](https://github.com/moni
 {{< code lang="yaml" >}}
 alert: NodeFilesystemSpaceFillingUp
 annotations:
-  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up.
+  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only
+    {{ printf "%.2f" $value }}% available space left and is filling up.
  summary: Filesystem is predicted to run out of space within the next 24 hours.
 expr: |
  (
@ -43,7 +44,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: NodeFilesystemSpaceFillingUp
 annotations:
-  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast.
+  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only
+    {{ printf "%.2f" $value }}% available space left and is filling up fast.
  summary: Filesystem is predicted to run out of space within the next 4 hours.
 expr: |
  (
@ -63,7 +65,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: NodeFilesystemAlmostOutOfSpace
 annotations:
-  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
+  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only
+    {{ printf "%.2f" $value }}% available space left.
  summary: Filesystem has less than 5% space left.
 expr: |
  (
@ -81,7 +84,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: NodeFilesystemAlmostOutOfSpace
 annotations:
-  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
+  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only
+    {{ printf "%.2f" $value }}% available space left.
  summary: Filesystem has less than 3% space left.
 expr: |
  (
@ -99,7 +103,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: NodeFilesystemFilesFillingUp
 annotations:
-  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up.
+  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only
+    {{ printf "%.2f" $value }}% available inodes left and is filling up.
  summary: Filesystem is predicted to run out of inodes within the next 24 hours.
 expr: |
  (
@ -119,7 +124,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: NodeFilesystemFilesFillingUp
 annotations:
-  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast.
+  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only
+    {{ printf "%.2f" $value }}% available inodes left and is filling up fast.
  summary: Filesystem is predicted to run out of inodes within the next 4 hours.
 expr: |
  (
@ -139,7 +145,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: NodeFilesystemAlmostOutOfFiles
 annotations:
-  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
+  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only
+    {{ printf "%.2f" $value }}% available inodes left.
  summary: Filesystem has less than 5% inodes left.
 expr: |
  (
@ -157,7 +164,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: NodeFilesystemAlmostOutOfFiles
 annotations:
-  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
+  description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only
+    {{ printf "%.2f" $value }}% available inodes left.
  summary: Filesystem has less than 3% inodes left.
 expr: |
  (
@ -175,7 +183,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: NodeNetworkReceiveErrs
 annotations:
-  description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.'
+  description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
+    {{ printf "%.0f" $value }} receive errors in the last two minutes.'
  summary: Network interface is reporting many receive errors.
 expr: |
  increase(node_network_receive_errs_total[2m]) > 10
@ -189,7 +198,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: NodeNetworkTransmitErrs
 annotations:
-  description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
+  description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
+    {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
  summary: Network interface is reporting many transmit errors.
 expr: |
  increase(node_network_transmit_errs_total[2m]) > 10
@ -229,7 +239,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: NodeClockSkewDetected
 annotations:
-  message: Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure NTP is configured correctly on this host.
+  message: Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure
+    NTP is configured correctly on this host.
  summary: Clock skew detected.
 expr: |
  (
@ -253,7 +264,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: NodeClockNotSynchronising
 annotations:
-  message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host.
+  message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured
+    on this host.
  summary: Clock not synchronising.
 expr: |
  min_over_time(node_timex_sync_status[5m]) == 0
--- a/site/content/prometheus/_index.md
+++ b/site/content/prometheus/_index.md
@ -35,13 +35,15 @@ labels:
 {{< /code >}}
 
 ##### PrometheusNotificationQueueRunningFull
-Prometheus alert notification queue predicted to run full in less than 30m.
+Prometheus alert notification queue predicted to run full in less than

 {{< code lang="yaml" >}}
 alert: PrometheusNotificationQueueRunningFull
 annotations:
-  description: Alert notification queue of Prometheus {{$labels.instance}} is running full.
-  summary: Prometheus alert notification queue predicted to run full in less than 30m.
+  description: Alert notification queue of Prometheus {{$labels.instance}} is running
+    full.
+  summary: Prometheus alert notification queue predicted to run full in less than
+    30m.
 expr: |
  # Without min_over_time, failed scrapes could create false negatives, see
  # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
@ -56,14 +58,17 @@ labels:
 {{< /code >}}
 
 ##### PrometheusErrorSendingAlertsToSomeAlertmanagers
-'{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus {{$labels.instance}} to Alertmanager {{$labels.alertmanager}}.'
-Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.
+'{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus
+
+Prometheus has encountered more than 1% errors sending alerts to a specific

 {{< code lang="yaml" >}}
 alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
 annotations:
-  description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus {{$labels.instance}} to Alertmanager {{$labels.alertmanager}}.'
-  summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.
+  description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus
+    {{$labels.instance}} to Alertmanager {{$labels.alertmanager}}.'
+  summary: Prometheus has encountered more than 1% errors sending alerts to a specific
+    Alertmanager.
 expr: |
  (
    rate(prometheus_notifications_errors_total{job="prometheus"}[5m])
@ -78,13 +83,14 @@ labels:
 {{< /code >}}
 
 ##### PrometheusErrorSendingAlertsToAnyAlertmanager
-'{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.instance}} to any Alertmanager.'
+'{{ printf "%.1f" $value }}% minimum errors while sending alerts from
 Prometheus encounters more than 3% errors sending alerts to any Alertmanager.

 {{< code lang="yaml" >}}
 alert: PrometheusErrorSendingAlertsToAnyAlertmanager
 annotations:
-  description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.instance}} to any Alertmanager.'
+  description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from
+    Prometheus {{$labels.instance}} to any Alertmanager.'
  summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
 expr: |
  min without(alertmanager) (
@ -120,7 +126,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: PrometheusTSDBReloadsFailing
 annotations:
-  description: Prometheus {{$labels.instance}} has detected {{$value | humanize}} reload failures over the last 3h.
+  description: Prometheus {{$labels.instance}} has detected {{$value | humanize}}
+    reload failures over the last 3h.
  summary: Prometheus has issues reloading blocks from disk.
 expr: |
  increase(prometheus_tsdb_reloads_failures_total{job="prometheus"}[3h]) > 0
@ -134,7 +141,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: PrometheusTSDBCompactionsFailing
 annotations:
-  description: Prometheus {{$labels.instance}} has detected {{$value | humanize}} compaction failures over the last 3h.
+  description: Prometheus {{$labels.instance}} has detected {{$value | humanize}}
+    compaction failures over the last 3h.
  summary: Prometheus has issues compacting blocks.
 expr: |
  increase(prometheus_tsdb_compactions_failed_total{job="prometheus"}[3h]) > 0
@ -162,7 +170,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: PrometheusDuplicateTimestamps
 annotations:
-  description: Prometheus {{$labels.instance}} is dropping {{ printf "%.4g" $value  }} samples/s with different values but duplicated timestamp.
+  description: Prometheus {{$labels.instance}} is dropping {{ printf "%.4g" $value  }}
+    samples/s with different values but duplicated timestamp.
  summary: Prometheus is dropping samples with duplicate timestamps.
 expr: |
  rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus"}[5m]) > 0
@ -176,7 +185,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: PrometheusOutOfOrderTimestamps
 annotations:
-  description: Prometheus {{$labels.instance}} is dropping {{ printf "%.4g" $value  }} samples/s with timestamps arriving out of order.
+  description: Prometheus {{$labels.instance}} is dropping {{ printf "%.4g" $value  }}
+    samples/s with timestamps arriving out of order.
  summary: Prometheus drops samples with out-of-order timestamps.
 expr: |
  rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus"}[5m]) > 0
@ -190,7 +200,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: PrometheusRemoteStorageFailures
 annotations:
-  description: Prometheus {{$labels.instance}} failed to send {{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }}
+  description: Prometheus {{$labels.instance}} failed to send {{ printf "%.1f" $value
+    }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }}
  summary: Prometheus fails to send samples to remote storage.
 expr: |
  (
@ -214,7 +225,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: PrometheusRemoteWriteBehind
 annotations:
-  description: Prometheus {{$labels.instance}} remote write is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}.
+  description: Prometheus {{$labels.instance}} remote write is {{ printf "%.1f" $value
+    }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}.
  summary: Prometheus remote write is behind.
 expr: |
  # Without max_over_time, failed scrapes could create false negatives, see
@ -235,8 +247,12 @@ labels:
 {{< code lang="yaml" >}}
 alert: PrometheusRemoteWriteDesiredShards
 annotations:
-  description: Prometheus {{$labels.instance}} remote write desired shards calculation wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus"}` $labels.instance | query | first | value }}.
-  summary: Prometheus remote write desired shards calculation wants to run more than configured max shards.
+  description: Prometheus {{$labels.instance}} remote write desired shards calculation
+    wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url
+    }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus"}`
+    $labels.instance | query | first | value }}.
+  summary: Prometheus remote write desired shards calculation wants to run more than
+    configured max shards.
 expr: |
  # Without max_over_time, failed scrapes could create false negatives, see
  # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
@ -255,7 +271,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: PrometheusRuleFailures
 annotations:
-  description: Prometheus {{$labels.instance}} has failed to evaluate {{ printf "%.0f" $value }} rules in the last 5m.
+  description: Prometheus {{$labels.instance}} has failed to evaluate {{ printf "%.0f"
+    $value }} rules in the last 5m.
  summary: Prometheus is failing rule evaluations.
 expr: |
  increase(prometheus_rule_evaluation_failures_total{job="prometheus"}[5m]) > 0
@ -269,7 +286,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: PrometheusMissingRuleEvaluations
 annotations:
-  description: Prometheus {{$labels.instance}} has missed {{ printf "%.0f" $value }} rule group evaluations in the last 5m.
+  description: Prometheus {{$labels.instance}} has missed {{ printf "%.0f" $value
+    }} rule group evaluations in the last 5m.
  summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
 expr: |
  increase(prometheus_rule_group_iterations_missed_total{job="prometheus"}[5m]) > 0
@ -283,8 +301,10 @@ labels:
 {{< code lang="yaml" >}}
 alert: PrometheusTargetLimitHit
 annotations:
-  description: Prometheus {{$labels.instance}} has dropped {{ printf "%.0f" $value }} targets because the number of targets exceeded the configured target_limit.
-  summary: Prometheus has dropped targets because some scrape configs have exceeded the targets limit.
+  description: Prometheus {{$labels.instance}} has dropped {{ printf "%.0f" $value
+    }} targets because the number of targets exceeded the configured target_limit.
+  summary: Prometheus has dropped targets because some scrape configs have exceeded
+    the targets limit.
 expr: |
  increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus"}[5m]) > 0
 for: 15m
@ -296,5 +316,5 @@ labels:
 Following dashboards are generated from mixins and hosted on github:


- [prometheus-remote-write](https://github.com/monitoring-mixins/website/blob/master/assets/prometheus/dashboards/prometheus-remote-write.json)
 - [prometheus](https://github.com/monitoring-mixins/website/blob/master/assets/prometheus/dashboards/prometheus.json)
+- [prometheus-remote-write](https://github.com/monitoring-mixins/website/blob/master/assets/prometheus/dashboards/prometheus-remote-write.json)
--- a/site/content/thanos/_index.md
+++ b/site/content/thanos/_index.md
@ -23,7 +23,8 @@ Complete list of pregenerated alerts is available [here](https://github.com/moni
 {{< code lang="yaml" >}}
 alert: ThanosCompactMultipleRunning
 annotations:
-  message: No more than one Thanos Compact instance should be running at once. There are {{ $value }}
+  message: No more than one Thanos Compact instance should be running at once. There
+    are {{ $value }}
 expr: sum(up{job=~"thanos-compact.*"}) > 1
 for: 5m
 labels:
@ -47,7 +48,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosCompactHighCompactionFailures
 annotations:
-  message: Thanos Compact {{$labels.job}} is failing to execute {{ $value | humanize }}% of compactions.
+  message: Thanos Compact {{$labels.job}} is failing to execute {{ $value | humanize
+    }}% of compactions.
 expr: |
  (
    sum by (job) (rate(thanos_compact_group_compactions_failures_total{job=~"thanos-compact.*"}[5m]))
@ -65,7 +67,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosCompactBucketHighOperationFailures
 annotations:
-  message: Thanos Compact {{$labels.job}} Bucket is failing to execute {{ $value | humanize }}% of operations.
+  message: Thanos Compact {{$labels.job}} Bucket is failing to execute {{ $value |
+    humanize }}% of operations.
 expr: |
  (
    sum by (job) (rate(thanos_objstore_bucket_operation_failures_total{job=~"thanos-compact.*"}[5m]))
@ -84,7 +87,8 @@ labels:
 alert: ThanosCompactHasNotRun
 annotations:
  message: Thanos Compact {{$labels.job}} has not uploaded anything for 24 hours.
-expr: (time() - max(max_over_time(thanos_objstore_bucket_last_successful_upload_time{job=~"thanos-compact.*"}[24h]))) / 60 / 60 > 24
+expr: (time() - max(max_over_time(thanos_objstore_bucket_last_successful_upload_time{job=~"thanos-compact.*"}[24h])))
+  / 60 / 60 > 24
 labels:
  severity: warning
 {{< /code >}}
@ -96,7 +100,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosQueryHttpRequestQueryErrorRateHigh
 annotations:
-  message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize }}% of "query" requests.
+  message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize
+    }}% of "query" requests.
 expr: |
  (
    sum(rate(http_requests_total{code=~"5..", job=~"thanos-query.*", handler="query"}[5m]))
@ -113,7 +118,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosQueryHttpRequestQueryRangeErrorRateHigh
 annotations:
-  message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize }}% of "query_range" requests.
+  message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize
+    }}% of "query_range" requests.
 expr: |
  (
    sum(rate(http_requests_total{code=~"5..", job=~"thanos-query.*", handler="query_range"}[5m]))
@ -130,7 +136,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosQueryGrpcServerErrorRate
 annotations:
-  message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.
+  message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize
+    }}% of requests.
 expr: |
  (
    sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-query.*"}[5m]))
@ -148,7 +155,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosQueryGrpcClientErrorRate
 annotations:
-  message: Thanos Query {{$labels.job}} is failing to send {{ $value | humanize }}% of requests.
+  message: Thanos Query {{$labels.job}} is failing to send {{ $value | humanize }}%
+    of requests.
 expr: |
  (
    sum by (job) (rate(grpc_client_handled_total{grpc_code!="OK", job=~"thanos-query.*"}[5m]))
@ -165,7 +173,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosQueryHighDNSFailures
 annotations:
-  message: Thanos Query {{$labels.job}} have {{ $value | humanize }}% of failing DNS queries for store endpoints.
+  message: Thanos Query {{$labels.job}} have {{ $value | humanize }}% of failing DNS
+    queries for store endpoints.
 expr: |
  (
    sum by (job) (rate(thanos_querier_store_apis_dns_failures_total{job=~"thanos-query.*"}[5m]))
@ -182,7 +191,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosQueryInstantLatencyHigh
 annotations:
-  message: Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for instant queries.
+  message: Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value
+    }} seconds for instant queries.
 expr: |
  (
    histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~"thanos-query.*", handler="query"}[5m]))) > 40
@ -199,7 +209,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosQueryRangeLatencyHigh
 annotations:
-  message: Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for range queries.
+  message: Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value
+    }} seconds for range queries.
 expr: |
  (
    histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~"thanos-query.*", handler="query_range"}[5m]))) > 90
@ -218,7 +229,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosReceiveHttpRequestErrorRateHigh
 annotations:
-  message: Thanos Receive {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.
+  message: Thanos Receive {{$labels.job}} is failing to handle {{ $value | humanize
+    }}% of requests.
 expr: |
  (
    sum(rate(http_requests_total{code=~"5..", job=~"thanos-receive.*", handler="receive"}[5m]))
@ -235,7 +247,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosReceiveHttpRequestLatencyHigh
 annotations:
-  message: Thanos Receive {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for requests.
+  message: Thanos Receive {{$labels.job}} has a 99th percentile latency of {{ $value
+    }} seconds for requests.
 expr: |
  (
    histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~"thanos-receive.*", handler="receive"}[5m]))) > 10
@ -252,7 +265,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosReceiveHighReplicationFailures
 annotations:
-  message: Thanos Receive {{$labels.job}} is failing to replicate {{ $value | humanize }}% of requests.
+  message: Thanos Receive {{$labels.job}} is failing to replicate {{ $value | humanize
+    }}% of requests.
 expr: |
  thanos_receive_replication_factor > 1
    and
@ -279,7 +293,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosReceiveHighForwardRequestFailures
 annotations:
-  message: Thanos Receive {{$labels.job}} is failing to forward {{ $value | humanize }}% of requests.
+  message: Thanos Receive {{$labels.job}} is failing to forward {{ $value | humanize
+    }}% of requests.
 expr: |
  (
    sum by (job) (rate(thanos_receive_forward_requests_total{result="error", job=~"thanos-receive.*"}[5m]))
@ -296,7 +311,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosReceiveHighHashringFileRefreshFailures
 annotations:
-  message: Thanos Receive {{$labels.job}} is failing to refresh hashring file, {{ $value | humanize }} of attempts failed.
+  message: Thanos Receive {{$labels.job}} is failing to refresh hashring file, {{
+    $value | humanize }} of attempts failed.
 expr: |
  (
    sum by (job) (rate(thanos_receive_hashrings_file_errors_total{job=~"thanos-receive.*"}[5m]))
@ -315,7 +331,8 @@ labels:
 alert: ThanosReceiveConfigReloadFailure
 annotations:
  message: Thanos Receive {{$labels.job}} has not been able to reload hashring configurations.
-expr: avg(thanos_receive_config_last_reload_successful{job=~"thanos-receive.*"}) by (job) != 1
+expr: avg(thanos_receive_config_last_reload_successful{job=~"thanos-receive.*"}) by
+  (job) != 1
 for: 5m
 labels:
  severity: warning
@ -326,7 +343,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosReceiveNoUpload
 annotations:
-  message: Thanos Receive {{ $labels.instance }} of {{$labels.job}} has not uploaded latest data to object storage.
+  message: Thanos Receive {{ $labels.instance }} of {{$labels.job}} has not uploaded
+    latest data to object storage.
 expr: |
  (up{job=~"thanos-receive.*"} - 1)
  + on (instance) # filters to only alert on current instance last 3h
@ -356,7 +374,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosSidecarUnhealthy
 annotations:
-  message: Thanos Sidecar {{$labels.job}} {{$labels.pod}} is unhealthy for {{ $value }} seconds.
+  message: Thanos Sidecar {{$labels.job}} {{$labels.pod}} is unhealthy for {{ $value
+    }} seconds.
 expr: |
  time() - max(thanos_sidecar_last_heartbeat_success_time_seconds{job=~"thanos-sidecar.*"}) by (job, pod) >= 600
 labels:
@ -370,7 +389,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosStoreGrpcErrorRate
 annotations:
-  message: Thanos Store {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.
+  message: Thanos Store {{$labels.job}} is failing to handle {{ $value | humanize
+    }}% of requests.
 expr: |
  (
    sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-store.*"}[5m]))
@ -388,7 +408,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosStoreSeriesGateLatencyHigh
 annotations:
-  message: Thanos Store {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for store series gate requests.
+  message: Thanos Store {{$labels.job}} has a 99th percentile latency of {{ $value
+    }} seconds for store series gate requests.
 expr: |
  (
    histogram_quantile(0.9, sum by (job, le) (rate(thanos_bucket_store_series_gate_duration_seconds_bucket{job=~"thanos-store.*"}[5m]))) > 2
@ -405,7 +426,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosStoreBucketHighOperationFailures
 annotations:
-  message: Thanos Store {{$labels.job}} Bucket is failing to execute {{ $value | humanize }}% of operations.
+  message: Thanos Store {{$labels.job}} Bucket is failing to execute {{ $value | humanize
+    }}% of operations.
 expr: |
  (
    sum by (job) (rate(thanos_objstore_bucket_operation_failures_total{job=~"thanos-store.*"}[5m]))
@ -423,7 +445,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosStoreObjstoreOperationLatencyHigh
 annotations:
-  message: Thanos Store {{$labels.job}} Bucket has a 99th percentile latency of {{ $value }} seconds for the bucket operations.
+  message: Thanos Store {{$labels.job}} Bucket has a 99th percentile latency of {{
+    $value }} seconds for the bucket operations.
 expr: |
  (
    histogram_quantile(0.9, sum by (job, le) (rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~"thanos-store.*"}[5m]))) > 2
@ -452,12 +475,13 @@ labels:
 {{< /code >}}
 
 ##### ThanosRuleSenderIsFailingAlerts
-Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to send alerts to alertmanager.
+Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to send alerts to

 {{< code lang="yaml" >}}
 alert: ThanosRuleSenderIsFailingAlerts
 annotations:
-  message: Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to send alerts to alertmanager.
+  message: Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to send alerts to
+    alertmanager.
 expr: |
  sum by (job) (rate(thanos_alert_sender_alerts_dropped_total{job=~"thanos-rule.*"}[5m])) > 0
 for: 5m
@ -488,7 +512,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosRuleHighRuleEvaluationWarnings
 annotations:
-  message: Thanos Rule {{$labels.job}} {{$labels.pod}} has high number of evaluation warnings.
+  message: Thanos Rule {{$labels.job}} {{$labels.pod}} has high number of evaluation
+    warnings.
 expr: |
  sum by (job) (rate(thanos_rule_evaluation_with_warnings_total{job=~"thanos-rule.*"}[5m])) > 0
 for: 15m
@ -501,7 +526,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosRuleRuleEvaluationLatencyHigh
 annotations:
-  message: Thanos Rule {{$labels.job}}/{{$labels.pod}} has higher evaluation latency than interval for {{$labels.rule_group}}.
+  message: Thanos Rule {{$labels.job}}/{{$labels.pod}} has higher evaluation latency
+    than interval for {{$labels.rule_group}}.
 expr: |
  (
    sum by (job, pod, rule_group) (prometheus_rule_group_last_duration_seconds{job=~"thanos-rule.*"})
@ -518,7 +544,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosRuleGrpcErrorRate
 annotations:
-  message: Thanos Rule {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.
+  message: Thanos Rule {{$labels.job}} is failing to handle {{ $value | humanize }}%
+    of requests.
 expr: |
  (
    sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-rule.*"}[5m]))
@ -537,7 +564,8 @@ labels:
 alert: ThanosRuleConfigReloadFailure
 annotations:
  message: Thanos Rule {{$labels.job}} has not been able to reload its configuration.
-expr: avg(thanos_rule_config_last_reload_successful{job=~"thanos-rule.*"}) by (job) != 1
+expr: avg(thanos_rule_config_last_reload_successful{job=~"thanos-rule.*"}) by (job)
+  != 1
 for: 5m
 labels:
  severity: info
@ -548,7 +576,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosRuleQueryHighDNSFailures
 annotations:
-  message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing DNS queries for query endpoints.
+  message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing DNS
+    queries for query endpoints.
 expr: |
  (
    sum by (job) (rate(thanos_ruler_query_apis_dns_failures_total{job=~"thanos-rule.*"}[5m]))
@ -566,7 +595,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosRuleAlertmanagerHighDNSFailures
 annotations:
-  message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing DNS queries for Alertmanager endpoints.
+  message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing DNS
+    queries for Alertmanager endpoints.
 expr: |
  (
    sum by (job) (rate(thanos_ruler_alertmanagers_dns_failures_total{job=~"thanos-rule.*"}[5m]))
@ -584,7 +614,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosRuleNoEvaluationFor10Intervals
 annotations:
-  message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% rule groups that did not evaluate for at least 10x of their expected interval.
+  message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% rule groups that
+    did not evaluate for at least 10x of their expected interval.
 expr: |
  time() -  max by (job, group) (prometheus_rule_group_last_evaluation_timestamp_seconds{job=~"thanos-rule.*"})
  >
@ -599,7 +630,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosNoRuleEvaluations
 annotations:
-  message: Thanos Rule {{$labels.job}} did not perform any rule evaluations in the past 2 minutes.
+  message: Thanos Rule {{$labels.job}} did not perform any rule evaluations in the
+    past 2 minutes.
 expr: |
  sum(rate(prometheus_rule_evaluations_total{job=~"thanos-rule.*"}[2m])) <= 0
    and
@ -726,7 +758,8 @@ labels:
 {{< code lang="yaml" >}}
 alert: ThanosBucketReplicateRunLatency
 annotations:
-  message: Thanos Replicate {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for the replicate operations.
+  message: Thanos Replicate {{$labels.job}} has a 99th percentile latency of {{ $value
+    }} seconds for the replicate operations.
 expr: |
  (
    histogram_quantile(0.9, sum by (job, le) (rate(thanos_replicate_replication_run_duration_seconds_bucket{job=~"thanos-bucket-replicate.*"}[5m]))) > 20