assets,site/content: daily assets regeneration

2024-12-14 11:37:31 +00:00 · 2021-12-28 03:25:10 +00:00 · 2021-12-28 03:25:10 +00:00 · 313c81e897
commit 313c81e897
parent 53269c316f
19 changed files with 21291 additions and 4 deletions
--- a/assets/loki/alerts.yaml
+++ b/assets/loki/alerts.yaml
@ -1 +1,41 @@
-null
+groups:
+- name: loki_alerts
+  rules:
+  - alert: LokiRequestErrors
+    annotations:
+      message: |
+        {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
+    expr: |
+      100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[1m])) by (namespace, job, route)
+        /
+      sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route)
+        > 10
+    for: 15m
+    labels:
+      severity: critical
+  - alert: LokiRequestPanics
+    annotations:
+      message: |
+        {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics.
+    expr: |
+      sum(increase(loki_panic_total[10m])) by (namespace, job) > 0
+    labels:
+      severity: critical
+  - alert: LokiRequestLatency
+    annotations:
+      message: |
+        {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
+    expr: |
+      namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*"} > 1
+    for: 15m
+    labels:
+      severity: critical
+  - alert: LokiTooManyCompactorsRunning
+    annotations:
+      message: |
+        {{ $labels.namespace }} has had {{ printf "%.0f" $value }} compactors running for more than 5m. Only one compactor should run at a time.
+    expr: |
+      sum(loki_boltdb_shipper_compactor_running) by (namespace) > 1
+    for: 5m
+    labels:
+      severity: warning
--- a/assets/loki/dashboards/loki-chunks.json
+++ b/assets/loki/dashboards/loki-chunks.json
--- a/assets/loki/dashboards/loki-deletion.json
+++ b/assets/loki/dashboards/loki-deletion.json
@ -0,0 +1,544 @@
+{
+   "annotations": {
+      "list": [ ]
+   },
+   "editable": true,
+   "gnetId": null,
+   "graphTooltip": 0,
+   "hideControls": false,
+   "links": [
+      {
+         "asDropdown": true,
+         "icon": "external link",
+         "includeVars": true,
+         "keepTime": true,
+         "tags": [
+            "loki"
+         ],
+         "targetBlank": false,
+         "title": "Loki Dashboards",
+         "type": "dashboards"
+      }
+   ],
+   "refresh": "10s",
+   "rows": [
+      {
+         "collapse": false,
+         "height": "100px",
+         "panels": [
+            {
+               "aliasColors": { },
+               "bars": false,
+               "dashLength": 10,
+               "dashes": false,
+               "datasource": "$datasource",
+               "fill": 1,
+               "format": "none",
+               "id": 1,
+               "legend": {
+                  "avg": false,
+                  "current": false,
+                  "max": false,
+                  "min": false,
+                  "show": true,
+                  "total": false,
+                  "values": false
+               },
+               "lines": true,
+               "linewidth": 1,
+               "links": [ ],
+               "nullPointMode": "null as zero",
+               "percentage": false,
+               "pointradius": 5,
+               "points": false,
+               "renderer": "flot",
+               "seriesOverrides": [ ],
+               "spaceLength": 10,
+               "span": 6,
+               "stack": false,
+               "steppedLine": false,
+               "targets": [
+                  {
+                     "expr": "sum(loki_compactor_pending_delete_requests_count{cluster=~\"$cluster\", namespace=~\"$namespace\"})",
+                     "format": "time_series",
+                     "instant": true,
+                     "intervalFactor": 2,
+                     "refId": "A"
+                  }
+               ],
+               "thresholds": "70,80",
+               "timeFrom": null,
+               "timeShift": null,
+               "title": "Number of Pending Requests",
+               "tooltip": {
+                  "shared": true,
+                  "sort": 2,
+                  "value_type": "individual"
+               },
+               "type": "singlestat",
+               "xaxis": {
+                  "buckets": null,
+                  "mode": "time",
+                  "name": null,
+                  "show": true,
+                  "values": [ ]
+               },
+               "yaxes": [
+                  {
+                     "format": "short",
+                     "label": null,
+                     "logBase": 1,
+                     "max": null,
+                     "min": 0,
+                     "show": true
+                  },
+                  {
+                     "format": "short",
+                     "label": null,
+                     "logBase": 1,
+                     "max": null,
+                     "min": null,
+                     "show": false
+                  }
+               ]
+            },
+            {
+               "aliasColors": { },
+               "bars": false,
+               "dashLength": 10,
+               "dashes": false,
+               "datasource": "$datasource",
+               "fill": 1,
+               "format": "dtdurations",
+               "id": 2,
+               "legend": {
+                  "avg": false,
+                  "current": false,
+                  "max": false,
+                  "min": false,
+                  "show": true,
+                  "total": false,
+                  "values": false
+               },
+               "lines": true,
+               "linewidth": 1,
+               "links": [ ],
+               "nullPointMode": "null as zero",
+               "percentage": false,
+               "pointradius": 5,
+               "points": false,
+               "renderer": "flot",
+               "seriesOverrides": [ ],
+               "spaceLength": 10,
+               "span": 6,
+               "stack": false,
+               "steppedLine": false,
+               "targets": [
+                  {
+                     "expr": "max(loki_compactor_oldest_pending_delete_request_age_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"})",
+                     "format": "time_series",
+                     "instant": true,
+                     "intervalFactor": 2,
+                     "refId": "A"
+                  }
+               ],
+               "thresholds": "70,80",
+               "timeFrom": null,
+               "timeShift": null,
+               "title": "Oldest Pending Request Age",
+               "tooltip": {
+                  "shared": true,
+                  "sort": 2,
+                  "value_type": "individual"
+               },
+               "type": "singlestat",
+               "xaxis": {
+                  "buckets": null,
+                  "mode": "time",
+                  "name": null,
+                  "show": true,
+                  "values": [ ]
+               },
+               "yaxes": [
+                  {
+                     "format": "short",
+                     "label": null,
+                     "logBase": 1,
+                     "max": null,
+                     "min": 0,
+                     "show": true
+                  },
+                  {
+                     "format": "short",
+                     "label": null,
+                     "logBase": 1,
+                     "max": null,
+                     "min": null,
+                     "show": false
+                  }
+               ]
+            }
+         ],
+         "repeat": null,
+         "repeatIteration": null,
+         "repeatRowId": null,
+         "showTitle": false,
+         "title": "Headlines",
+         "titleSize": "h6"
+      },
+      {
+         "collapse": false,
+         "height": "250px",
+         "panels": [
+            {
+               "aliasColors": { },
+               "bars": false,
+               "dashLength": 10,
+               "dashes": false,
+               "datasource": "$datasource",
+               "fill": 1,
+               "id": 3,
+               "legend": {
+                  "avg": false,
+                  "current": false,
+                  "max": false,
+                  "min": false,
+                  "show": true,
+                  "total": false,
+                  "values": false
+               },
+               "lines": true,
+               "linewidth": 1,
+               "links": [ ],
+               "nullPointMode": "null as zero",
+               "percentage": false,
+               "pointradius": 5,
+               "points": false,
+               "renderer": "flot",
+               "seriesOverrides": [ ],
+               "spaceLength": 10,
+               "span": 6,
+               "stack": false,
+               "steppedLine": false,
+               "targets": [
+                  {
+                     "expr": "sum(increase(loki_compactor_delete_requests_received_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))",
+                     "format": "time_series",
+                     "intervalFactor": 2,
+                     "legendFormat": "received",
+                     "legendLink": null,
+                     "step": 10
+                  }
+               ],
+               "thresholds": [ ],
+               "timeFrom": null,
+               "timeShift": null,
+               "title": "Delete Requests Received / Day",
+               "tooltip": {
+                  "shared": true,
+                  "sort": 2,
+                  "value_type": "individual"
+               },
+               "type": "graph",
+               "xaxis": {
+                  "buckets": null,
+                  "mode": "time",
+                  "name": null,
+                  "show": true,
+                  "values": [ ]
+               },
+               "yaxes": [
+                  {
+                     "format": "short",
+                     "label": null,
+                     "logBase": 1,
+                     "max": null,
+                     "min": 0,
+                     "show": true
+                  },
+                  {
+                     "format": "short",
+                     "label": null,
+                     "logBase": 1,
+                     "max": null,
+                     "min": null,
+                     "show": false
+                  }
+               ]
+            },
+            {
+               "aliasColors": { },
+               "bars": false,
+               "dashLength": 10,
+               "dashes": false,
+               "datasource": "$datasource",
+               "fill": 1,
+               "id": 4,
+               "legend": {
+                  "avg": false,
+                  "current": false,
+                  "max": false,
+                  "min": false,
+                  "show": true,
+                  "total": false,
+                  "values": false
+               },
+               "lines": true,
+               "linewidth": 1,
+               "links": [ ],
+               "nullPointMode": "null as zero",
+               "percentage": false,
+               "pointradius": 5,
+               "points": false,
+               "renderer": "flot",
+               "seriesOverrides": [ ],
+               "spaceLength": 10,
+               "span": 6,
+               "stack": false,
+               "steppedLine": false,
+               "targets": [
+                  {
+                     "expr": "sum(increase(loki_compactor_delete_requests_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))",
+                     "format": "time_series",
+                     "intervalFactor": 2,
+                     "legendFormat": "processed",
+                     "legendLink": null,
+                     "step": 10
+                  }
+               ],
+               "thresholds": [ ],
+               "timeFrom": null,
+               "timeShift": null,
+               "title": "Delete Requests Processed / Day",
+               "tooltip": {
+                  "shared": true,
+                  "sort": 2,
+                  "value_type": "individual"
+               },
+               "type": "graph",
+               "xaxis": {
+                  "buckets": null,
+                  "mode": "time",
+                  "name": null,
+                  "show": true,
+                  "values": [ ]
+               },
+               "yaxes": [
+                  {
+                     "format": "short",
+                     "label": null,
+                     "logBase": 1,
+                     "max": null,
+                     "min": 0,
+                     "show": true
+                  },
+                  {
+                     "format": "short",
+                     "label": null,
+                     "logBase": 1,
+                     "max": null,
+                     "min": null,
+                     "show": false
+                  }
+               ]
+            }
+         ],
+         "repeat": null,
+         "repeatIteration": null,
+         "repeatRowId": null,
+         "showTitle": true,
+         "title": "Churn",
+         "titleSize": "h6"
+      },
+      {
+         "collapse": false,
+         "height": "250px",
+         "panels": [
+            {
+               "aliasColors": { },
+               "bars": false,
+               "dashLength": 10,
+               "dashes": false,
+               "datasource": "$datasource",
+               "fill": 1,
+               "id": 5,
+               "legend": {
+                  "avg": false,
+                  "current": false,
+                  "max": false,
+                  "min": false,
+                  "show": true,
+                  "total": false,
+                  "values": false
+               },
+               "lines": true,
+               "linewidth": 1,
+               "links": [ ],
+               "nullPointMode": "null as zero",
+               "percentage": false,
+               "pointradius": 5,
+               "points": false,
+               "renderer": "flot",
+               "seriesOverrides": [ ],
+               "spaceLength": 10,
+               "span": 12,
+               "stack": false,
+               "steppedLine": false,
+               "targets": [
+                  {
+                     "expr": "sum(increase(loki_compactor_load_pending_requests_attempts_total{status=\"fail\", cluster=~\"$cluster\", namespace=~\"$namespace\"}[1h]))",
+                     "format": "time_series",
+                     "intervalFactor": 2,
+                     "legendFormat": "failures",
+                     "legendLink": null,
+                     "step": 10
+                  }
+               ],
+               "thresholds": [ ],
+               "timeFrom": null,
+               "timeShift": null,
+               "title": "Failures in Loading Delete Requests / Hour",
+               "tooltip": {
+                  "shared": true,
+                  "sort": 2,
+                  "value_type": "individual"
+               },
+               "type": "graph",
+               "xaxis": {
+                  "buckets": null,
+                  "mode": "time",
+                  "name": null,
+                  "show": true,
+                  "values": [ ]
+               },
+               "yaxes": [
+                  {
+                     "format": "short",
+                     "label": null,
+                     "logBase": 1,
+                     "max": null,
+                     "min": 0,
+                     "show": true
+                  },
+                  {
+                     "format": "short",
+                     "label": null,
+                     "logBase": 1,
+                     "max": null,
+                     "min": null,
+                     "show": false
+                  }
+               ]
+            }
+         ],
+         "repeat": null,
+         "repeatIteration": null,
+         "repeatRowId": null,
+         "showTitle": true,
+         "title": "Failures",
+         "titleSize": "h6"
+      }
+   ],
+   "schemaVersion": 14,
+   "style": "dark",
+   "tags": [
+      "loki"
+   ],
+   "templating": {
+      "list": [
+         {
+            "current": {
+               "text": "default",
+               "value": "default"
+            },
+            "hide": 0,
+            "label": "Data Source",
+            "name": "datasource",
+            "options": [ ],
+            "query": "prometheus",
+            "refresh": 1,
+            "regex": "",
+            "type": "datasource"
+         },
+         {
+            "allValue": null,
+            "current": {
+               "text": "prod",
+               "value": "prod"
+            },
+            "datasource": "$datasource",
+            "hide": 0,
+            "includeAll": false,
+            "label": "cluster",
+            "multi": false,
+            "name": "cluster",
+            "options": [ ],
+            "query": "label_values(loki_build_info, cluster)",
+            "refresh": 1,
+            "regex": "",
+            "sort": 2,
+            "tagValuesQuery": "",
+            "tags": [ ],
+            "tagsQuery": "",
+            "type": "query",
+            "useTags": false
+         },
+         {
+            "allValue": null,
+            "current": {
+               "text": "prod",
+               "value": "prod"
+            },
+            "datasource": "$datasource",
+            "hide": 0,
+            "includeAll": false,
+            "label": "namespace",
+            "multi": false,
+            "name": "namespace",
+            "options": [ ],
+            "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)",
+            "refresh": 1,
+            "regex": "",
+            "sort": 2,
+            "tagValuesQuery": "",
+            "tags": [ ],
+            "tagsQuery": "",
+            "type": "query",
+            "useTags": false
+         }
+      ]
+   },
+   "time": {
+      "from": "now-1h",
+      "to": "now"
+   },
+   "timepicker": {
+      "refresh_intervals": [
+         "5s",
+         "10s",
+         "30s",
+         "1m",
+         "5m",
+         "15m",
+         "30m",
+         "1h",
+         "2h",
+         "1d"
+      ],
+      "time_options": [
+         "5m",
+         "15m",
+         "1h",
+         "6h",
+         "12h",
+         "24h",
+         "2d",
+         "7d",
+         "30d"
+      ]
+   },
+   "timezone": "utc",
+   "title": "Loki / Deletion",
+   "uid": "deletion",
+   "version": 0
+}
--- a/assets/loki/dashboards/loki-logs.json
+++ b/assets/loki/dashboards/loki-logs.json
--- a/assets/loki/dashboards/loki-mixin-recording-rules.json
+++ b/assets/loki/dashboards/loki-mixin-recording-rules.json
@ -0,0 +1,657 @@
+{
+   "annotations": {
+      "list": [
+         {
+            "builtIn": 1,
+            "datasource": "-- Grafana --",
+            "enable": true,
+            "hide": true,
+            "iconColor": "rgba(0, 211, 255, 1)",
+            "name": "Annotations & Alerts",
+            "target": {
+               "limit": 100,
+               "matchAny": false,
+               "tags": [ ],
+               "type": "dashboard"
+            },
+            "type": "dashboard"
+         },
+         {
+            "datasource": "${datasource}",
+            "enable": false,
+            "expr": "sum by (tenant) (changes(loki_ruler_wal_prometheus_tsdb_wal_truncations_total{tenant=~\"${tenant}\"}[$__rate_interval]))",
+            "iconColor": "red",
+            "name": "WAL Truncations",
+            "target": {
+               "queryType": "Azure Monitor",
+               "refId": "Anno"
+            },
+            "titleFormat": "{{tenant}}"
+         }
+      ]
+   },
+   "editable": true,
+   "fiscalYearStartMonth": 0,
+   "gnetId": null,
+   "graphTooltip": 0,
+   "iteration": 1635347545534,
+   "links": [ ],
+   "liveNow": false,
+   "panels": [
+      {
+         "datasource": "${datasource}",
+         "fieldConfig": {
+            "defaults": {
+               "color": {
+                  "mode": "thresholds"
+               },
+               "mappings": [ ],
+               "noValue": "0",
+               "thresholds": {
+                  "mode": "absolute",
+                  "steps": [
+                     {
+                        "color": "green",
+                        "value": null
+                     },
+                     {
+                        "color": "red",
+                        "value": 1
+                     }
+                  ]
+               }
+            },
+            "overrides": [ ]
+         },
+         "gridPos": {
+            "h": 10,
+            "w": 2,
+            "x": 0,
+            "y": 0
+         },
+         "id": 2,
+         "options": {
+            "colorMode": "value",
+            "graphMode": "area",
+            "justifyMode": "auto",
+            "orientation": "auto",
+            "reduceOptions": {
+               "calcs": [
+                  "lastNotNull"
+               ],
+               "fields": "",
+               "values": false
+            },
+            "textMode": "auto"
+         },
+         "pluginVersion": "8.3.0-38205pre",
+         "targets": [
+            {
+               "datasource": "${datasource}",
+               "exemplar": false,
+               "expr": "sum(loki_ruler_wal_appender_ready) by (pod, tenant) == 0",
+               "instant": true,
+               "interval": "",
+               "legendFormat": "",
+               "refId": "A"
+            }
+         ],
+         "title": "Appenders Not Ready",
+         "type": "stat"
+      },
+      {
+         "datasource": "${datasource}",
+         "description": "",
+         "fieldConfig": {
+            "defaults": {
+               "color": {
+                  "mode": "palette-classic"
+               },
+               "custom": {
+                  "axisLabel": "",
+                  "axisPlacement": "auto",
+                  "barAlignment": 0,
+                  "drawStyle": "line",
+                  "fillOpacity": 0,
+                  "gradientMode": "none",
+                  "hideFrom": {
+                     "legend": false,
+                     "tooltip": false,
+                     "viz": false
+                  },
+                  "lineInterpolation": "linear",
+                  "lineWidth": 1,
+                  "pointSize": 5,
+                  "scaleDistribution": {
+                     "type": "linear"
+                  },
+                  "showPoints": "auto",
+                  "spanNulls": false,
+                  "stacking": {
+                     "group": "A",
+                     "mode": "none"
+                  },
+                  "thresholdsStyle": {
+                     "mode": "off"
+                  }
+               },
+               "mappings": [ ],
+               "thresholds": {
+                  "mode": "absolute",
+                  "steps": [
+                     {
+                        "color": "green",
+                        "value": null
+                     },
+                     {
+                        "color": "red",
+                        "value": 80
+                     }
+                  ]
+               }
+            },
+            "overrides": [ ]
+         },
+         "gridPos": {
+            "h": 10,
+            "w": 11,
+            "x": 2,
+            "y": 0
+         },
+         "id": 4,
+         "options": {
+            "legend": {
+               "calcs": [ ],
+               "displayMode": "list",
+               "placement": "bottom"
+            },
+            "tooltip": {
+               "mode": "single"
+            }
+         },
+         "targets": [
+            {
+               "datasource": "${datasource}",
+               "exemplar": true,
+               "expr": "sum(rate(loki_ruler_wal_samples_appended_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) > 0",
+               "interval": "",
+               "legendFormat": "{{tenant}}",
+               "refId": "A"
+            }
+         ],
+         "title": "Samples Appended to WAL per Second",
+         "type": "timeseries"
+      },
+      {
+         "datasource": "${datasource}",
+         "description": "Series are unique combinations of labels",
+         "fieldConfig": {
+            "defaults": {
+               "color": {
+                  "mode": "palette-classic"
+               },
+               "custom": {
+                  "axisLabel": "",
+                  "axisPlacement": "auto",
+                  "barAlignment": 0,
+                  "drawStyle": "line",
+                  "fillOpacity": 0,
+                  "gradientMode": "none",
+                  "hideFrom": {
+                     "legend": false,
+                     "tooltip": false,
+                     "viz": false
+                  },
+                  "lineInterpolation": "linear",
+                  "lineWidth": 1,
+                  "pointSize": 5,
+                  "scaleDistribution": {
+                     "type": "linear"
+                  },
+                  "showPoints": "auto",
+                  "spanNulls": false,
+                  "stacking": {
+                     "group": "A",
+                     "mode": "none"
+                  },
+                  "thresholdsStyle": {
+                     "mode": "off"
+                  }
+               },
+               "mappings": [ ],
+               "thresholds": {
+                  "mode": "absolute",
+                  "steps": [
+                     {
+                        "color": "green",
+                        "value": null
+                     },
+                     {
+                        "color": "red",
+                        "value": 80
+                     }
+                  ]
+               }
+            },
+            "overrides": [ ]
+         },
+         "gridPos": {
+            "h": 10,
+            "w": 11,
+            "x": 13,
+            "y": 0
+         },
+         "id": 5,
+         "options": {
+            "legend": {
+               "calcs": [ ],
+               "displayMode": "list",
+               "placement": "bottom"
+            },
+            "tooltip": {
+               "mode": "single"
+            }
+         },
+         "targets": [
+            {
+               "datasource": "${datasource}",
+               "exemplar": true,
+               "expr": "sum(rate(loki_ruler_wal_storage_created_series_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) > 0",
+               "interval": "",
+               "legendFormat": "{{tenant}}",
+               "refId": "A"
+            }
+         ],
+         "title": "Series Created per Second",
+         "type": "timeseries"
+      },
+      {
+         "datasource": "${datasource}",
+         "description": "Difference between highest timestamp appended to WAL and highest timestamp successfully written to remote storage",
+         "fieldConfig": {
+            "defaults": {
+               "color": {
+                  "mode": "palette-classic"
+               },
+               "custom": {
+                  "axisLabel": "",
+                  "axisPlacement": "auto",
+                  "barAlignment": 0,
+                  "drawStyle": "line",
+                  "fillOpacity": 0,
+                  "gradientMode": "none",
+                  "hideFrom": {
+                     "legend": false,
+                     "tooltip": false,
+                     "viz": false
+                  },
+                  "lineInterpolation": "linear",
+                  "lineWidth": 1,
+                  "pointSize": 5,
+                  "scaleDistribution": {
+                     "type": "linear"
+                  },
+                  "showPoints": "auto",
+                  "spanNulls": false,
+                  "stacking": {
+                     "group": "A",
+                     "mode": "none"
+                  },
+                  "thresholdsStyle": {
+                     "mode": "off"
+                  }
+               },
+               "mappings": [ ],
+               "thresholds": {
+                  "mode": "absolute",
+                  "steps": [
+                     {
+                        "color": "green",
+                        "value": null
+                     },
+                     {
+                        "color": "red",
+                        "value": 80
+                     }
+                  ]
+               }
+            },
+            "overrides": [ ]
+         },
+         "gridPos": {
+            "h": 10,
+            "w": 12,
+            "x": 0,
+            "y": 10
+         },
+         "id": 6,
+         "options": {
+            "legend": {
+               "calcs": [ ],
+               "displayMode": "list",
+               "placement": "bottom"
+            },
+            "tooltip": {
+               "mode": "single"
+            }
+         },
+         "targets": [
+            {
+               "datasource": "${datasource}",
+               "exemplar": true,
+               "expr": "loki_ruler_wal_prometheus_remote_storage_highest_timestamp_in_seconds{tenant=~\"${tenant}\"}\n- on (tenant)\n  (\n    loki_ruler_wal_prometheus_remote_storage_queue_highest_sent_timestamp_seconds{tenant=~\"${tenant}\"}\n    or vector(0)\n  )",
+               "interval": "",
+               "legendFormat": "{{tenant}}",
+               "refId": "A"
+            }
+         ],
+         "title": "Write Behind",
+         "type": "timeseries"
+      },
+      {
+         "datasource": "${datasource}",
+         "description": "",
+         "fieldConfig": {
+            "defaults": {
+               "color": {
+                  "mode": "palette-classic"
+               },
+               "custom": {
+                  "axisLabel": "",
+                  "axisPlacement": "auto",
+                  "barAlignment": 0,
+                  "drawStyle": "line",
+                  "fillOpacity": 0,
+                  "gradientMode": "none",
+                  "hideFrom": {
+                     "legend": false,
+                     "tooltip": false,
+                     "viz": false
+                  },
+                  "lineInterpolation": "linear",
+                  "lineWidth": 1,
+                  "pointSize": 5,
+                  "scaleDistribution": {
+                     "type": "linear"
+                  },
+                  "showPoints": "auto",
+                  "spanNulls": false,
+                  "stacking": {
+                     "group": "A",
+                     "mode": "none"
+                  },
+                  "thresholdsStyle": {
+                     "mode": "off"
+                  }
+               },
+               "mappings": [ ],
+               "thresholds": {
+                  "mode": "absolute",
+                  "steps": [
+                     {
+                        "color": "green",
+                        "value": null
+                     },
+                     {
+                        "color": "red",
+                        "value": 80
+                     }
+                  ]
+               }
+            },
+            "overrides": [ ]
+         },
+         "gridPos": {
+            "h": 10,
+            "w": 12,
+            "x": 12,
+            "y": 10
+         },
+         "id": 7,
+         "options": {
+            "legend": {
+               "calcs": [ ],
+               "displayMode": "list",
+               "placement": "bottom"
+            },
+            "tooltip": {
+               "mode": "single"
+            }
+         },
+         "targets": [
+            {
+               "datasource": "${datasource}",
+               "exemplar": true,
+               "expr": "sum(rate(loki_ruler_wal_prometheus_remote_storage_samples_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) > 0",
+               "interval": "",
+               "legendFormat": "{{tenant}}",
+               "refId": "A"
+            }
+         ],
+         "title": "Samples Sent per Second",
+         "type": "timeseries"
+      },
+      {
+         "datasource": "${datasource}",
+         "description": "\n",
+         "fieldConfig": {
+            "defaults": {
+               "color": {
+                  "mode": "palette-classic"
+               },
+               "custom": {
+                  "axisLabel": "",
+                  "axisPlacement": "auto",
+                  "barAlignment": 0,
+                  "drawStyle": "line",
+                  "fillOpacity": 0,
+                  "gradientMode": "none",
+                  "hideFrom": {
+                     "legend": false,
+                     "tooltip": false,
+                     "viz": false
+                  },
+                  "lineInterpolation": "linear",
+                  "lineWidth": 1,
+                  "pointSize": 5,
+                  "scaleDistribution": {
+                     "type": "linear"
+                  },
+                  "showPoints": "auto",
+                  "spanNulls": false,
+                  "stacking": {
+                     "group": "A",
+                     "mode": "none"
+                  },
+                  "thresholdsStyle": {
+                     "mode": "off"
+                  }
+               },
+               "mappings": [ ],
+               "thresholds": {
+                  "mode": "absolute",
+                  "steps": [
+                     {
+                        "color": "green",
+                        "value": null
+                     },
+                     {
+                        "color": "red",
+                        "value": 80
+                     }
+                  ]
+               },
+               "unit": "bytes"
+            },
+            "overrides": [ ]
+         },
+         "gridPos": {
+            "h": 10,
+            "w": 12,
+            "x": 0,
+            "y": 20
+         },
+         "id": 8,
+         "options": {
+            "legend": {
+               "calcs": [ ],
+               "displayMode": "list",
+               "placement": "bottom"
+            },
+            "tooltip": {
+               "mode": "single"
+            }
+         },
+         "targets": [
+            {
+               "datasource": "${datasource}",
+               "exemplar": true,
+               "expr": "sum by (tenant) (loki_ruler_wal_disk_size{tenant=~\"${tenant}\"})",
+               "interval": "",
+               "legendFormat": "{{tenant}}",
+               "refId": "A"
+            }
+         ],
+         "title": "WAL Disk Size",
+         "type": "timeseries"
+      },
+      {
+         "datasource": "${datasource}",
+         "description": "Some number of pending samples is expected, but if remote-write is failing this value will remain high",
+         "fieldConfig": {
+            "defaults": {
+               "color": {
+                  "mode": "palette-classic"
+               },
+               "custom": {
+                  "axisLabel": "",
+                  "axisPlacement": "auto",
+                  "barAlignment": 0,
+                  "drawStyle": "line",
+                  "fillOpacity": 0,
+                  "gradientMode": "none",
+                  "hideFrom": {
+                     "legend": false,
+                     "tooltip": false,
+                     "viz": false
+                  },
+                  "lineInterpolation": "linear",
+                  "lineWidth": 1,
+                  "pointSize": 5,
+                  "scaleDistribution": {
+                     "type": "linear"
+                  },
+                  "showPoints": "auto",
+                  "spanNulls": false,
+                  "stacking": {
+                     "group": "A",
+                     "mode": "none"
+                  },
+                  "thresholdsStyle": {
+                     "mode": "off"
+                  }
+               },
+               "mappings": [ ],
+               "thresholds": {
+                  "mode": "absolute",
+                  "steps": [
+                     {
+                        "color": "green",
+                        "value": null
+                     },
+                     {
+                        "color": "red",
+                        "value": 80
+                     }
+                  ]
+               }
+            },
+            "overrides": [ ]
+         },
+         "gridPos": {
+            "h": 10,
+            "w": 12,
+            "x": 12,
+            "y": 20
+         },
+         "id": 9,
+         "options": {
+            "legend": {
+               "calcs": [ ],
+               "displayMode": "list",
+               "placement": "bottom"
+            },
+            "tooltip": {
+               "mode": "single"
+            }
+         },
+         "targets": [
+            {
+               "datasource": "${datasource}",
+               "exemplar": true,
+               "expr": "max(loki_ruler_wal_prometheus_remote_storage_samples_pending{tenant=~\"${tenant}\"}) by (tenant,pod) > 0",
+               "interval": "",
+               "legendFormat": "{{tenant}}",
+               "refId": "A"
+            }
+         ],
+         "title": "Pending Samples",
+         "type": "timeseries"
+      }
+   ],
+   "schemaVersion": 31,
+   "style": "dark",
+   "tags": [ ],
+   "templating": {
+      "list": [
+         {
+            "description": null,
+            "error": null,
+            "hide": 0,
+            "includeAll": false,
+            "label": "Datasource",
+            "multi": false,
+            "name": "datasource",
+            "options": [ ],
+            "query": "prometheus",
+            "queryValue": "",
+            "refresh": 1,
+            "regex": "",
+            "skipUrlSync": false,
+            "type": "datasource"
+         },
+         {
+            "allValue": null,
+            "datasource": "${datasource}",
+            "definition": "label_values(loki_ruler_wal_samples_appended_total, tenant)",
+            "description": null,
+            "error": null,
+            "hide": 0,
+            "includeAll": true,
+            "label": "Tenant",
+            "multi": true,
+            "name": "tenant",
+            "options": [ ],
+            "query": {
+               "query": "label_values(loki_ruler_wal_samples_appended_total, tenant)",
+               "refId": "StandardVariableQuery"
+            },
+            "refresh": 2,
+            "regex": "",
+            "skipUrlSync": false,
+            "sort": 0,
+            "type": "query"
+         }
+      ]
+   },
+   "time": {
+      "from": "now-6h",
+      "to": "now"
+   },
+   "timepicker": { },
+   "timezone": "",
+   "title": "Recording Rules",
+   "uid": "2xKA_ZK7k",
+   "version": 9,
+   "weekStart": ""
+}
--- a/assets/loki/dashboards/loki-operational.json
+++ b/assets/loki/dashboards/loki-operational.json
--- a/assets/loki/dashboards/loki-reads-resources.json
+++ b/assets/loki/dashboards/loki-reads-resources.json
--- a/assets/loki/dashboards/loki-reads.json
+++ b/assets/loki/dashboards/loki-reads.json
--- a/assets/loki/dashboards/loki-retention.json
+++ b/assets/loki/dashboards/loki-retention.json
--- a/assets/loki/dashboards/loki-writes-resources.json
+++ b/assets/loki/dashboards/loki-writes-resources.json
--- a/assets/loki/dashboards/loki-writes.json
+++ b/assets/loki/dashboards/loki-writes.json
--- a/assets/loki/rules.yaml
+++ b/assets/loki/rules.yaml
@ -1 +1,49 @@
-null
+groups:
+- name: loki_rules
+  rules:
+  - expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m]))
+      by (le, job))
+    record: job:loki_request_duration_seconds:99quantile
+  - expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m]))
+      by (le, job))
+    record: job:loki_request_duration_seconds:50quantile
+  - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (job) / sum(rate(loki_request_duration_seconds_count[1m]))
+      by (job)
+    record: job:loki_request_duration_seconds:avg
+  - expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job)
+    record: job:loki_request_duration_seconds_bucket:sum_rate
+  - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (job)
+    record: job:loki_request_duration_seconds_sum:sum_rate
+  - expr: sum(rate(loki_request_duration_seconds_count[1m])) by (job)
+    record: job:loki_request_duration_seconds_count:sum_rate
+  - expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m]))
+      by (le, job, route))
+    record: job_route:loki_request_duration_seconds:99quantile
+  - expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m]))
+      by (le, job, route))
+    record: job_route:loki_request_duration_seconds:50quantile
+  - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (job, route) / sum(rate(loki_request_duration_seconds_count[1m]))
+      by (job, route)
+    record: job_route:loki_request_duration_seconds:avg
+  - expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job, route)
+    record: job_route:loki_request_duration_seconds_bucket:sum_rate
+  - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (job, route)
+    record: job_route:loki_request_duration_seconds_sum:sum_rate
+  - expr: sum(rate(loki_request_duration_seconds_count[1m])) by (job, route)
+    record: job_route:loki_request_duration_seconds_count:sum_rate
+  - expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m]))
+      by (le, namespace, job, route))
+    record: namespace_job_route:loki_request_duration_seconds:99quantile
+  - expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m]))
+      by (le, namespace, job, route))
+    record: namespace_job_route:loki_request_duration_seconds:50quantile
+  - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (namespace, job, route)
+      / sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route)
+    record: namespace_job_route:loki_request_duration_seconds:avg
+  - expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, namespace, job,
+      route)
+    record: namespace_job_route:loki_request_duration_seconds_bucket:sum_rate
+  - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (namespace, job, route)
+    record: namespace_job_route:loki_request_duration_seconds_sum:sum_rate
+  - expr: sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route)
+    record: namespace_job_route:loki_request_duration_seconds_count:sum_rate
--- a/assets/prometheus-operator/alerts.yaml
+++ b/assets/prometheus-operator/alerts.yaml
@ -1 +1,87 @@
-null
+groups:
+- name: prometheus-operator
+  rules:
+  - alert: PrometheusOperatorListErrors
+    annotations:
+      description: Errors while performing List operations in controller {{$labels.controller}}
+        in {{$labels.namespace}} namespace.
+      summary: Errors while performing list operations in controller.
+    expr: |
+      (sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="prometheus-operator"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{job="prometheus-operator"}[10m]))) > 0.4
+    for: 15m
+    labels:
+      severity: warning
+  - alert: PrometheusOperatorWatchErrors
+    annotations:
+      description: Errors while performing watch operations in controller {{$labels.controller}}
+        in {{$labels.namespace}} namespace.
+      summary: Errors while performing watch operations in controller.
+    expr: |
+      (sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator"}[10m]))) > 0.4
+    for: 15m
+    labels:
+      severity: warning
+  - alert: PrometheusOperatorSyncFailed
+    annotations:
+      description: Controller {{ $labels.controller }} in {{ $labels.namespace }}
+        namespace fails to reconcile {{ $value }} objects.
+      summary: Last controller reconciliation failed
+    expr: |
+      min_over_time(prometheus_operator_syncs{status="failed",job="prometheus-operator"}[5m]) > 0
+    for: 10m
+    labels:
+      severity: warning
+  - alert: PrometheusOperatorReconcileErrors
+    annotations:
+      description: '{{ $value | humanizePercentage }} of reconciling operations failed
+        for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace.'
+      summary: Errors while reconciling controller.
+    expr: |
+      (sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator"}[5m]))) > 0.1
+    for: 10m
+    labels:
+      severity: warning
+  - alert: PrometheusOperatorNodeLookupErrors
+    annotations:
+      description: Errors while reconciling Prometheus in {{ $labels.namespace }}
+        Namespace.
+      summary: Errors while reconciling Prometheus.
+    expr: |
+      rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator"}[5m]) > 0.1
+    for: 10m
+    labels:
+      severity: warning
+  - alert: PrometheusOperatorNotReady
+    annotations:
+      description: Prometheus operator in {{ $labels.namespace }} namespace isn't
+        ready to reconcile {{ $labels.controller }} resources.
+      summary: Prometheus operator not ready
+    expr: |
+      min by(namespace, controller) (max_over_time(prometheus_operator_ready{job="prometheus-operator"}[5m]) == 0)
+    for: 5m
+    labels:
+      severity: warning
+  - alert: PrometheusOperatorRejectedResources
+    annotations:
+      description: Prometheus operator in {{ $labels.namespace }} namespace rejected
+        {{ printf "%0.0f" $value }} {{ $labels.controller }}/{{ $labels.resource }}
+        resources.
+      summary: Resources rejected by Prometheus operator
+    expr: |
+      min_over_time(prometheus_operator_managed_resources{state="rejected",job="prometheus-operator"}[5m]) > 0
+    for: 5m
+    labels:
+      severity: warning
+- name: config-reloaders
+  rules:
+  - alert: ConfigReloaderSidecarErrors
+    annotations:
+      description: |-
+        Errors encountered while the {{$labels.pod}} config-reloader sidecar attempts to sync config in {{$labels.namespace}} namespace.
+        As a result, configuration for service running in {{$labels.pod}} may be stale and cannot be updated anymore.
+      summary: config-reloader sidecar has not had a successful reload for 10m
+    expr: |
+      max_over_time(reloader_last_reload_successful{namespace=~".+"}[5m]) == 0
+    for: 10m
+    labels:
+      severity: warning
--- a/assets/prometheus/alerts.yaml
+++ b/assets/prometheus/alerts.yaml
@ -1 +1,241 @@
-null
+groups:
+- name: prometheus
+  rules:
+  - alert: PrometheusBadConfig
+    annotations:
+      description: Prometheus {{$labels.instance}} has failed to reload its configuration.
+      summary: Failed Prometheus configuration reload.
+    expr: |
+      # Without max_over_time, failed scrapes could create false negatives, see
+      # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+      max_over_time(prometheus_config_last_reload_successful{job="prometheus"}[5m]) == 0
+    for: 10m
+    labels:
+      severity: critical
+  - alert: PrometheusNotificationQueueRunningFull
+    annotations:
+      description: Alert notification queue of Prometheus {{$labels.instance}} is
+        running full.
+      summary: Prometheus alert notification queue predicted to run full in less than
+        30m.
+    expr: |
+      # Without min_over_time, failed scrapes could create false negatives, see
+      # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+      (
+        predict_linear(prometheus_notifications_queue_length{job="prometheus"}[5m], 60 * 30)
+      >
+        min_over_time(prometheus_notifications_queue_capacity{job="prometheus"}[5m])
+      )
+    for: 15m
+    labels:
+      severity: warning
+  - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
+    annotations:
+      description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus
+        {{$labels.instance}} to Alertmanager {{$labels.alertmanager}}.'
+      summary: Prometheus has encountered more than 1% errors sending alerts to a
+        specific Alertmanager.
+    expr: |
+      (
+        rate(prometheus_notifications_errors_total{job="prometheus"}[5m])
+      /
+        rate(prometheus_notifications_sent_total{job="prometheus"}[5m])
+      )
+      * 100
+      > 1
+    for: 15m
+    labels:
+      severity: warning
+  - alert: PrometheusNotConnectedToAlertmanagers
+    annotations:
+      description: Prometheus {{$labels.instance}} is not connected to any Alertmanagers.
+      summary: Prometheus is not connected to any Alertmanagers.
+    expr: |
+      # Without max_over_time, failed scrapes could create false negatives, see
+      # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+      max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus"}[5m]) < 1
+    for: 10m
+    labels:
+      severity: warning
+  - alert: PrometheusTSDBReloadsFailing
+    annotations:
+      description: Prometheus {{$labels.instance}} has detected {{$value | humanize}}
+        reload failures over the last 3h.
+      summary: Prometheus has issues reloading blocks from disk.
+    expr: |
+      increase(prometheus_tsdb_reloads_failures_total{job="prometheus"}[3h]) > 0
+    for: 4h
+    labels:
+      severity: warning
+  - alert: PrometheusTSDBCompactionsFailing
+    annotations:
+      description: Prometheus {{$labels.instance}} has detected {{$value | humanize}}
+        compaction failures over the last 3h.
+      summary: Prometheus has issues compacting blocks.
+    expr: |
+      increase(prometheus_tsdb_compactions_failed_total{job="prometheus"}[3h]) > 0
+    for: 4h
+    labels:
+      severity: warning
+  - alert: PrometheusNotIngestingSamples
+    annotations:
+      description: Prometheus {{$labels.instance}} is not ingesting samples.
+      summary: Prometheus is not ingesting samples.
+    expr: |
+      (
+        rate(prometheus_tsdb_head_samples_appended_total{job="prometheus"}[5m]) <= 0
+      and
+        (
+          sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus"}) > 0
+        or
+          sum without(rule_group) (prometheus_rule_group_rules{job="prometheus"}) > 0
+        )
+      )
+    for: 10m
+    labels:
+      severity: warning
+  - alert: PrometheusDuplicateTimestamps
+    annotations:
+      description: Prometheus {{$labels.instance}} is dropping {{ printf "%.4g" $value  }}
+        samples/s with different values but duplicated timestamp.
+      summary: Prometheus is dropping samples with duplicate timestamps.
+    expr: |
+      rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus"}[5m]) > 0
+    for: 10m
+    labels:
+      severity: warning
+  - alert: PrometheusOutOfOrderTimestamps
+    annotations:
+      description: Prometheus {{$labels.instance}} is dropping {{ printf "%.4g" $value  }}
+        samples/s with timestamps arriving out of order.
+      summary: Prometheus drops samples with out-of-order timestamps.
+    expr: |
+      rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus"}[5m]) > 0
+    for: 10m
+    labels:
+      severity: warning
+  - alert: PrometheusRemoteStorageFailures
+    annotations:
+      description: Prometheus {{$labels.instance}} failed to send {{ printf "%.1f"
+        $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }}
+      summary: Prometheus fails to send samples to remote storage.
+    expr: |
+      (
+        (rate(prometheus_remote_storage_failed_samples_total{job="prometheus"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus"}[5m]))
+      /
+        (
+          (rate(prometheus_remote_storage_failed_samples_total{job="prometheus"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus"}[5m]))
+        +
+          (rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus"}[5m]) or rate(prometheus_remote_storage_samples_total{job="prometheus"}[5m]))
+        )
+      )
+      * 100
+      > 1
+    for: 15m
+    labels:
+      severity: critical
+  - alert: PrometheusRemoteWriteBehind
+    annotations:
+      description: Prometheus {{$labels.instance}} remote write is {{ printf "%.1f"
+        $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}.
+      summary: Prometheus remote write is behind.
+    expr: |
+      # Without max_over_time, failed scrapes could create false negatives, see
+      # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+      (
+        max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus"}[5m])
+      - ignoring(remote_name, url) group_right
+        max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus"}[5m])
+      )
+      > 120
+    for: 15m
+    labels:
+      severity: critical
+  - alert: PrometheusRemoteWriteDesiredShards
+    annotations:
+      description: Prometheus {{$labels.instance}} remote write desired shards calculation
+        wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url
+        }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus"}`
+        $labels.instance | query | first | value }}.
+      summary: Prometheus remote write desired shards calculation wants to run more
+        than configured max shards.
+    expr: |
+      # Without max_over_time, failed scrapes could create false negatives, see
+      # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+      (
+        max_over_time(prometheus_remote_storage_shards_desired{job="prometheus"}[5m])
+      >
+        max_over_time(prometheus_remote_storage_shards_max{job="prometheus"}[5m])
+      )
+    for: 15m
+    labels:
+      severity: warning
+  - alert: PrometheusRuleFailures
+    annotations:
+      description: Prometheus {{$labels.instance}} has failed to evaluate {{ printf
+        "%.0f" $value }} rules in the last 5m.
+      summary: Prometheus is failing rule evaluations.
+    expr: |
+      increase(prometheus_rule_evaluation_failures_total{job="prometheus"}[5m]) > 0
+    for: 15m
+    labels:
+      severity: critical
+  - alert: PrometheusMissingRuleEvaluations
+    annotations:
+      description: Prometheus {{$labels.instance}} has missed {{ printf "%.0f" $value
+        }} rule group evaluations in the last 5m.
+      summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
+    expr: |
+      increase(prometheus_rule_group_iterations_missed_total{job="prometheus"}[5m]) > 0
+    for: 15m
+    labels:
+      severity: warning
+  - alert: PrometheusTargetLimitHit
+    annotations:
+      description: Prometheus {{$labels.instance}} has dropped {{ printf "%.0f" $value
+        }} targets because the number of targets exceeded the configured target_limit.
+      summary: Prometheus has dropped targets because some scrape configs have exceeded
+        the targets limit.
+    expr: |
+      increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus"}[5m]) > 0
+    for: 15m
+    labels:
+      severity: warning
+  - alert: PrometheusLabelLimitHit
+    annotations:
+      description: Prometheus {{$labels.instance}} has dropped {{ printf "%.0f" $value
+        }} targets because some samples exceeded the configured label_limit, label_name_length_limit
+        or label_value_length_limit.
+      summary: Prometheus has dropped targets because some scrape configs have exceeded
+        the labels limit.
+    expr: |
+      increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="prometheus"}[5m]) > 0
+    for: 15m
+    labels:
+      severity: warning
+  - alert: PrometheusTargetSyncFailure
+    annotations:
+      description: '{{ printf "%.0f" $value }} targets in Prometheus {{$labels.instance}}
+        have failed to sync because invalid configuration was supplied.'
+      summary: Prometheus has failed to sync targets.
+    expr: |
+      increase(prometheus_target_sync_failed_total{job="prometheus"}[30m]) > 0
+    for: 5m
+    labels:
+      severity: critical
+  - alert: PrometheusErrorSendingAlertsToAnyAlertmanager
+    annotations:
+      description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts
+        from Prometheus {{$labels.instance}} to any Alertmanager.'
+      summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
+    expr: |
+      min without (alertmanager) (
+        rate(prometheus_notifications_errors_total{job="prometheus",alertmanager!~``}[5m])
+      /
+        rate(prometheus_notifications_sent_total{job="prometheus",alertmanager!~``}[5m])
+      )
+      * 100
+      > 3
+    for: 15m
+    labels:
+      severity: critical
--- a/assets/prometheus/dashboards/prometheus-remote-write.json
+++ b/assets/prometheus/dashboards/prometheus-remote-write.json
--- a/assets/prometheus/dashboards/prometheus.json
+++ b/assets/prometheus/dashboards/prometheus.json
--- a/site/content/loki/_index.md
+++ b/site/content/loki/_index.md
@ -10,3 +10,227 @@ title: loki
 Jsonnet source code is available at [github.com/grafana/loki](https://github.com/grafana/loki/tree/master/production/loki-mixin)
 {{< /panel >}}

+## Alerts
+
+{{< panel style="warning" >}}
+Complete list of pregenerated alerts is available [here](https://github.com/monitoring-mixins/website/blob/master/assets/loki/alerts.yaml).
+{{< /panel >}}
+
+### loki_alerts
+
+##### LokiRequestErrors
+
+{{< code lang="yaml" >}}
+alert: LokiRequestErrors
+annotations:
+  message: |
+    {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
+expr: |
+  100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[1m])) by (namespace, job, route)
+    /
+  sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route)
+    > 10
+for: 15m
+labels:
+  severity: critical
+{{< /code >}}
+ 
+##### LokiRequestPanics
+
+{{< code lang="yaml" >}}
+alert: LokiRequestPanics
+annotations:
+  message: |
+    {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics.
+expr: |
+  sum(increase(loki_panic_total[10m])) by (namespace, job) > 0
+labels:
+  severity: critical
+{{< /code >}}
+ 
+##### LokiRequestLatency
+
+{{< code lang="yaml" >}}
+alert: LokiRequestLatency
+annotations:
+  message: |
+    {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
+expr: |
+  namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*"} > 1
+for: 15m
+labels:
+  severity: critical
+{{< /code >}}
+ 
+##### LokiTooManyCompactorsRunning
+
+{{< code lang="yaml" >}}
+alert: LokiTooManyCompactorsRunning
+annotations:
+  message: |
+    {{ $labels.namespace }} has had {{ printf "%.0f" $value }} compactors running for more than 5m. Only one compactor should run at a time.
+expr: |
+  sum(loki_boltdb_shipper_compactor_running) by (namespace) > 1
+for: 5m
+labels:
+  severity: warning
+{{< /code >}}
+ 
+## Recording rules
+
+{{< panel style="warning" >}}
+Complete list of pregenerated recording rules is available [here](https://github.com/monitoring-mixins/website/blob/master/assets/loki/rules.yaml).
+{{< /panel >}}
+
+### loki_rules
+
+##### job:loki_request_duration_seconds:99quantile
+
+{{< code lang="yaml" >}}
+expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m]))
+  by (le, job))
+record: job:loki_request_duration_seconds:99quantile
+{{< /code >}}
+ 
+##### job:loki_request_duration_seconds:50quantile
+
+{{< code lang="yaml" >}}
+expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m]))
+  by (le, job))
+record: job:loki_request_duration_seconds:50quantile
+{{< /code >}}
+ 
+##### job:loki_request_duration_seconds:avg
+
+{{< code lang="yaml" >}}
+expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (job) / sum(rate(loki_request_duration_seconds_count[1m]))
+  by (job)
+record: job:loki_request_duration_seconds:avg
+{{< /code >}}
+ 
+##### job:loki_request_duration_seconds_bucket:sum_rate
+
+{{< code lang="yaml" >}}
+expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job)
+record: job:loki_request_duration_seconds_bucket:sum_rate
+{{< /code >}}
+ 
+##### job:loki_request_duration_seconds_sum:sum_rate
+
+{{< code lang="yaml" >}}
+expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (job)
+record: job:loki_request_duration_seconds_sum:sum_rate
+{{< /code >}}
+ 
+##### job:loki_request_duration_seconds_count:sum_rate
+
+{{< code lang="yaml" >}}
+expr: sum(rate(loki_request_duration_seconds_count[1m])) by (job)
+record: job:loki_request_duration_seconds_count:sum_rate
+{{< /code >}}
+ 
+##### job_route:loki_request_duration_seconds:99quantile
+
+{{< code lang="yaml" >}}
+expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m]))
+  by (le, job, route))
+record: job_route:loki_request_duration_seconds:99quantile
+{{< /code >}}
+ 
+##### job_route:loki_request_duration_seconds:50quantile
+
+{{< code lang="yaml" >}}
+expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m]))
+  by (le, job, route))
+record: job_route:loki_request_duration_seconds:50quantile
+{{< /code >}}
+ 
+##### job_route:loki_request_duration_seconds:avg
+
+{{< code lang="yaml" >}}
+expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (job, route) / sum(rate(loki_request_duration_seconds_count[1m]))
+  by (job, route)
+record: job_route:loki_request_duration_seconds:avg
+{{< /code >}}
+ 
+##### job_route:loki_request_duration_seconds_bucket:sum_rate
+
+{{< code lang="yaml" >}}
+expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job, route)
+record: job_route:loki_request_duration_seconds_bucket:sum_rate
+{{< /code >}}
+ 
+##### job_route:loki_request_duration_seconds_sum:sum_rate
+
+{{< code lang="yaml" >}}
+expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (job, route)
+record: job_route:loki_request_duration_seconds_sum:sum_rate
+{{< /code >}}
+ 
+##### job_route:loki_request_duration_seconds_count:sum_rate
+
+{{< code lang="yaml" >}}
+expr: sum(rate(loki_request_duration_seconds_count[1m])) by (job, route)
+record: job_route:loki_request_duration_seconds_count:sum_rate
+{{< /code >}}
+ 
+##### namespace_job_route:loki_request_duration_seconds:99quantile
+
+{{< code lang="yaml" >}}
+expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m]))
+  by (le, namespace, job, route))
+record: namespace_job_route:loki_request_duration_seconds:99quantile
+{{< /code >}}
+ 
+##### namespace_job_route:loki_request_duration_seconds:50quantile
+
+{{< code lang="yaml" >}}
+expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m]))
+  by (le, namespace, job, route))
+record: namespace_job_route:loki_request_duration_seconds:50quantile
+{{< /code >}}
+ 
+##### namespace_job_route:loki_request_duration_seconds:avg
+
+{{< code lang="yaml" >}}
+expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (namespace, job, route)
+  / sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route)
+record: namespace_job_route:loki_request_duration_seconds:avg
+{{< /code >}}
+ 
+##### namespace_job_route:loki_request_duration_seconds_bucket:sum_rate
+
+{{< code lang="yaml" >}}
+expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, namespace, job,
+  route)
+record: namespace_job_route:loki_request_duration_seconds_bucket:sum_rate
+{{< /code >}}
+ 
+##### namespace_job_route:loki_request_duration_seconds_sum:sum_rate
+
+{{< code lang="yaml" >}}
+expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (namespace, job, route)
+record: namespace_job_route:loki_request_duration_seconds_sum:sum_rate
+{{< /code >}}
+ 
+##### namespace_job_route:loki_request_duration_seconds_count:sum_rate
+
+{{< code lang="yaml" >}}
+expr: sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route)
+record: namespace_job_route:loki_request_duration_seconds_count:sum_rate
+{{< /code >}}
+ 
+## Dashboards
+Following dashboards are generated from mixins and hosted on github:
+
+
+- [loki-chunks](https://github.com/monitoring-mixins/website/blob/master/assets/loki/dashboards/loki-chunks.json)
+- [loki-deletion](https://github.com/monitoring-mixins/website/blob/master/assets/loki/dashboards/loki-deletion.json)
+- [loki-logs](https://github.com/monitoring-mixins/website/blob/master/assets/loki/dashboards/loki-logs.json)
+- [loki-mixin-recording-rules](https://github.com/monitoring-mixins/website/blob/master/assets/loki/dashboards/loki-mixin-recording-rules.json)
+- [loki-operational](https://github.com/monitoring-mixins/website/blob/master/assets/loki/dashboards/loki-operational.json)
+- [loki-reads-resources](https://github.com/monitoring-mixins/website/blob/master/assets/loki/dashboards/loki-reads-resources.json)
+- [loki-reads](https://github.com/monitoring-mixins/website/blob/master/assets/loki/dashboards/loki-reads.json)
+- [loki-retention](https://github.com/monitoring-mixins/website/blob/master/assets/loki/dashboards/loki-retention.json)
+- [loki-writes-resources](https://github.com/monitoring-mixins/website/blob/master/assets/loki/dashboards/loki-writes-resources.json)
+- [loki-writes](https://github.com/monitoring-mixins/website/blob/master/assets/loki/dashboards/loki-writes.json)
--- a/site/content/prometheus-operator/_index.md
+++ b/site/content/prometheus-operator/_index.md
@ -10,3 +10,133 @@ title: prometheus-operator
 Jsonnet source code is available at [github.com/prometheus-operator/prometheus-operator](https://github.com/prometheus-operator/prometheus-operator/tree/master/jsonnet/mixin)
 {{< /panel >}}

+## Alerts
+
+{{< panel style="warning" >}}
+Complete list of pregenerated alerts is available [here](https://github.com/monitoring-mixins/website/blob/master/assets/prometheus-operator/alerts.yaml).
+{{< /panel >}}
+
+### prometheus-operator
+
+##### PrometheusOperatorListErrors
+
+{{< code lang="yaml" >}}
+alert: PrometheusOperatorListErrors
+annotations:
+  description: Errors while performing List operations in controller {{$labels.controller}}
+    in {{$labels.namespace}} namespace.
+  summary: Errors while performing list operations in controller.
+expr: |
+  (sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="prometheus-operator"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{job="prometheus-operator"}[10m]))) > 0.4
+for: 15m
+labels:
+  severity: warning
+{{< /code >}}
+ 
+##### PrometheusOperatorWatchErrors
+
+{{< code lang="yaml" >}}
+alert: PrometheusOperatorWatchErrors
+annotations:
+  description: Errors while performing watch operations in controller {{$labels.controller}}
+    in {{$labels.namespace}} namespace.
+  summary: Errors while performing watch operations in controller.
+expr: |
+  (sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator"}[10m]))) > 0.4
+for: 15m
+labels:
+  severity: warning
+{{< /code >}}
+ 
+##### PrometheusOperatorSyncFailed
+
+{{< code lang="yaml" >}}
+alert: PrometheusOperatorSyncFailed
+annotations:
+  description: Controller {{ $labels.controller }} in {{ $labels.namespace }} namespace
+    fails to reconcile {{ $value }} objects.
+  summary: Last controller reconciliation failed
+expr: |
+  min_over_time(prometheus_operator_syncs{status="failed",job="prometheus-operator"}[5m]) > 0
+for: 10m
+labels:
+  severity: warning
+{{< /code >}}
+ 
+##### PrometheusOperatorReconcileErrors
+
+{{< code lang="yaml" >}}
+alert: PrometheusOperatorReconcileErrors
+annotations:
+  description: '{{ $value | humanizePercentage }} of reconciling operations failed
+    for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace.'
+  summary: Errors while reconciling controller.
+expr: |
+  (sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator"}[5m]))) > 0.1
+for: 10m
+labels:
+  severity: warning
+{{< /code >}}
+ 
+##### PrometheusOperatorNodeLookupErrors
+
+{{< code lang="yaml" >}}
+alert: PrometheusOperatorNodeLookupErrors
+annotations:
+  description: Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.
+  summary: Errors while reconciling Prometheus.
+expr: |
+  rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator"}[5m]) > 0.1
+for: 10m
+labels:
+  severity: warning
+{{< /code >}}
+ 
+##### PrometheusOperatorNotReady
+
+{{< code lang="yaml" >}}
+alert: PrometheusOperatorNotReady
+annotations:
+  description: Prometheus operator in {{ $labels.namespace }} namespace isn't ready
+    to reconcile {{ $labels.controller }} resources.
+  summary: Prometheus operator not ready
+expr: |
+  min by(namespace, controller) (max_over_time(prometheus_operator_ready{job="prometheus-operator"}[5m]) == 0)
+for: 5m
+labels:
+  severity: warning
+{{< /code >}}
+ 
+##### PrometheusOperatorRejectedResources
+
+{{< code lang="yaml" >}}
+alert: PrometheusOperatorRejectedResources
+annotations:
+  description: Prometheus operator in {{ $labels.namespace }} namespace rejected {{
+    printf "%0.0f" $value }} {{ $labels.controller }}/{{ $labels.resource }} resources.
+  summary: Resources rejected by Prometheus operator
+expr: |
+  min_over_time(prometheus_operator_managed_resources{state="rejected",job="prometheus-operator"}[5m]) > 0
+for: 5m
+labels:
+  severity: warning
+{{< /code >}}
+ 
+### config-reloaders
+
+##### ConfigReloaderSidecarErrors
+
+{{< code lang="yaml" >}}
+alert: ConfigReloaderSidecarErrors
+annotations:
+  description: |-
+    Errors encountered while the {{$labels.pod}} config-reloader sidecar attempts to sync config in {{$labels.namespace}} namespace.
+    As a result, configuration for service running in {{$labels.pod}} may be stale and cannot be updated anymore.
+  summary: config-reloader sidecar has not had a successful reload for 10m
+expr: |
+  max_over_time(reloader_last_reload_successful{namespace=~".+"}[5m]) == 0
+for: 10m
+labels:
+  severity: warning
+{{< /code >}}
+ 
--- a/site/content/prometheus/_index.md
+++ b/site/content/prometheus/_index.md
@ -10,3 +10,351 @@ The Prometheus Mixin is a set of configurable, reusable, and extensible alerts a
 Jsonnet source code is available at [github.com/prometheus/prometheus](https://github.com/prometheus/prometheus/tree/master/documentation/prometheus-mixin)
 {{< /panel >}}

+## Alerts
+
+{{< panel style="warning" >}}
+Complete list of pregenerated alerts is available [here](https://github.com/monitoring-mixins/website/blob/master/assets/prometheus/alerts.yaml).
+{{< /panel >}}
+
+### prometheus
+
+##### PrometheusBadConfig
+
+{{< code lang="yaml" >}}
+alert: PrometheusBadConfig
+annotations:
+  description: Prometheus {{$labels.instance}} has failed to reload its configuration.
+  summary: Failed Prometheus configuration reload.
+expr: |
+  # Without max_over_time, failed scrapes could create false negatives, see
+  # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+  max_over_time(prometheus_config_last_reload_successful{job="prometheus"}[5m]) == 0
+for: 10m
+labels:
+  severity: critical
+{{< /code >}}
+ 
+##### PrometheusNotificationQueueRunningFull
+Prometheus alert notification queue predicted to run full in less than
+
+{{< code lang="yaml" >}}
+alert: PrometheusNotificationQueueRunningFull
+annotations:
+  description: Alert notification queue of Prometheus {{$labels.instance}} is running
+    full.
+  summary: Prometheus alert notification queue predicted to run full in less than
+    30m.
+expr: |
+  # Without min_over_time, failed scrapes could create false negatives, see
+  # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+  (
+    predict_linear(prometheus_notifications_queue_length{job="prometheus"}[5m], 60 * 30)
+  >
+    min_over_time(prometheus_notifications_queue_capacity{job="prometheus"}[5m])
+  )
+for: 15m
+labels:
+  severity: warning
+{{< /code >}}
+ 
+##### PrometheusErrorSendingAlertsToSomeAlertmanagers
+'{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus
+
+Prometheus has encountered more than 1% errors sending alerts to a specific
+
+{{< code lang="yaml" >}}
+alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
+annotations:
+  description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus
+    {{$labels.instance}} to Alertmanager {{$labels.alertmanager}}.'
+  summary: Prometheus has encountered more than 1% errors sending alerts to a specific
+    Alertmanager.
+expr: |
+  (
+    rate(prometheus_notifications_errors_total{job="prometheus"}[5m])
+  /
+    rate(prometheus_notifications_sent_total{job="prometheus"}[5m])
+  )
+  * 100
+  > 1
+for: 15m
+labels:
+  severity: warning
+{{< /code >}}
+ 
+##### PrometheusNotConnectedToAlertmanagers
+
+{{< code lang="yaml" >}}
+alert: PrometheusNotConnectedToAlertmanagers
+annotations:
+  description: Prometheus {{$labels.instance}} is not connected to any Alertmanagers.
+  summary: Prometheus is not connected to any Alertmanagers.
+expr: |
+  # Without max_over_time, failed scrapes could create false negatives, see
+  # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+  max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus"}[5m]) < 1
+for: 10m
+labels:
+  severity: warning
+{{< /code >}}
+ 
+##### PrometheusTSDBReloadsFailing
+
+{{< code lang="yaml" >}}
+alert: PrometheusTSDBReloadsFailing
+annotations:
+  description: Prometheus {{$labels.instance}} has detected {{$value | humanize}}
+    reload failures over the last 3h.
+  summary: Prometheus has issues reloading blocks from disk.
+expr: |
+  increase(prometheus_tsdb_reloads_failures_total{job="prometheus"}[3h]) > 0
+for: 4h
+labels:
+  severity: warning
+{{< /code >}}
+ 
+##### PrometheusTSDBCompactionsFailing
+
+{{< code lang="yaml" >}}
+alert: PrometheusTSDBCompactionsFailing
+annotations:
+  description: Prometheus {{$labels.instance}} has detected {{$value | humanize}}
+    compaction failures over the last 3h.
+  summary: Prometheus has issues compacting blocks.
+expr: |
+  increase(prometheus_tsdb_compactions_failed_total{job="prometheus"}[3h]) > 0
+for: 4h
+labels:
+  severity: warning
+{{< /code >}}
+ 
+##### PrometheusNotIngestingSamples
+
+{{< code lang="yaml" >}}
+alert: PrometheusNotIngestingSamples
+annotations:
+  description: Prometheus {{$labels.instance}} is not ingesting samples.
+  summary: Prometheus is not ingesting samples.
+expr: |
+  (
+    rate(prometheus_tsdb_head_samples_appended_total{job="prometheus"}[5m]) <= 0
+  and
+    (
+      sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus"}) > 0
+    or
+      sum without(rule_group) (prometheus_rule_group_rules{job="prometheus"}) > 0
+    )
+  )
+for: 10m
+labels:
+  severity: warning
+{{< /code >}}
+ 
+##### PrometheusDuplicateTimestamps
+
+{{< code lang="yaml" >}}
+alert: PrometheusDuplicateTimestamps
+annotations:
+  description: Prometheus {{$labels.instance}} is dropping {{ printf "%.4g" $value  }}
+    samples/s with different values but duplicated timestamp.
+  summary: Prometheus is dropping samples with duplicate timestamps.
+expr: |
+  rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus"}[5m]) > 0
+for: 10m
+labels:
+  severity: warning
+{{< /code >}}
+ 
+##### PrometheusOutOfOrderTimestamps
+
+{{< code lang="yaml" >}}
+alert: PrometheusOutOfOrderTimestamps
+annotations:
+  description: Prometheus {{$labels.instance}} is dropping {{ printf "%.4g" $value  }}
+    samples/s with timestamps arriving out of order.
+  summary: Prometheus drops samples with out-of-order timestamps.
+expr: |
+  rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus"}[5m]) > 0
+for: 10m
+labels:
+  severity: warning
+{{< /code >}}
+ 
+##### PrometheusRemoteStorageFailures
+
+{{< code lang="yaml" >}}
+alert: PrometheusRemoteStorageFailures
+annotations:
+  description: Prometheus {{$labels.instance}} failed to send {{ printf "%.1f" $value
+    }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }}
+  summary: Prometheus fails to send samples to remote storage.
+expr: |
+  (
+    (rate(prometheus_remote_storage_failed_samples_total{job="prometheus"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus"}[5m]))
+  /
+    (
+      (rate(prometheus_remote_storage_failed_samples_total{job="prometheus"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus"}[5m]))
+    +
+      (rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus"}[5m]) or rate(prometheus_remote_storage_samples_total{job="prometheus"}[5m]))
+    )
+  )
+  * 100
+  > 1
+for: 15m
+labels:
+  severity: critical
+{{< /code >}}
+ 
+##### PrometheusRemoteWriteBehind
+
+{{< code lang="yaml" >}}
+alert: PrometheusRemoteWriteBehind
+annotations:
+  description: Prometheus {{$labels.instance}} remote write is {{ printf "%.1f" $value
+    }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}.
+  summary: Prometheus remote write is behind.
+expr: |
+  # Without max_over_time, failed scrapes could create false negatives, see
+  # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+  (
+    max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus"}[5m])
+  - ignoring(remote_name, url) group_right
+    max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus"}[5m])
+  )
+  > 120
+for: 15m
+labels:
+  severity: critical
+{{< /code >}}
+ 
+##### PrometheusRemoteWriteDesiredShards
+
+{{< code lang="yaml" >}}
+alert: PrometheusRemoteWriteDesiredShards
+annotations:
+  description: Prometheus {{$labels.instance}} remote write desired shards calculation
+    wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url
+    }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus"}`
+    $labels.instance | query | first | value }}.
+  summary: Prometheus remote write desired shards calculation wants to run more than
+    configured max shards.
+expr: |
+  # Without max_over_time, failed scrapes could create false negatives, see
+  # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+  (
+    max_over_time(prometheus_remote_storage_shards_desired{job="prometheus"}[5m])
+  >
+    max_over_time(prometheus_remote_storage_shards_max{job="prometheus"}[5m])
+  )
+for: 15m
+labels:
+  severity: warning
+{{< /code >}}
+ 
+##### PrometheusRuleFailures
+
+{{< code lang="yaml" >}}
+alert: PrometheusRuleFailures
+annotations:
+  description: Prometheus {{$labels.instance}} has failed to evaluate {{ printf "%.0f"
+    $value }} rules in the last 5m.
+  summary: Prometheus is failing rule evaluations.
+expr: |
+  increase(prometheus_rule_evaluation_failures_total{job="prometheus"}[5m]) > 0
+for: 15m
+labels:
+  severity: critical
+{{< /code >}}
+ 
+##### PrometheusMissingRuleEvaluations
+
+{{< code lang="yaml" >}}
+alert: PrometheusMissingRuleEvaluations
+annotations:
+  description: Prometheus {{$labels.instance}} has missed {{ printf "%.0f" $value
+    }} rule group evaluations in the last 5m.
+  summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
+expr: |
+  increase(prometheus_rule_group_iterations_missed_total{job="prometheus"}[5m]) > 0
+for: 15m
+labels:
+  severity: warning
+{{< /code >}}
+ 
+##### PrometheusTargetLimitHit
+
+{{< code lang="yaml" >}}
+alert: PrometheusTargetLimitHit
+annotations:
+  description: Prometheus {{$labels.instance}} has dropped {{ printf "%.0f" $value
+    }} targets because the number of targets exceeded the configured target_limit.
+  summary: Prometheus has dropped targets because some scrape configs have exceeded
+    the targets limit.
+expr: |
+  increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus"}[5m]) > 0
+for: 15m
+labels:
+  severity: warning
+{{< /code >}}
+ 
+##### PrometheusLabelLimitHit
+
+{{< code lang="yaml" >}}
+alert: PrometheusLabelLimitHit
+annotations:
+  description: Prometheus {{$labels.instance}} has dropped {{ printf "%.0f" $value
+    }} targets because some samples exceeded the configured label_limit, label_name_length_limit
+    or label_value_length_limit.
+  summary: Prometheus has dropped targets because some scrape configs have exceeded
+    the labels limit.
+expr: |
+  increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="prometheus"}[5m]) > 0
+for: 15m
+labels:
+  severity: warning
+{{< /code >}}
+ 
+##### PrometheusTargetSyncFailure
+
+{{< code lang="yaml" >}}
+alert: PrometheusTargetSyncFailure
+annotations:
+  description: '{{ printf "%.0f" $value }} targets in Prometheus {{$labels.instance}}
+    have failed to sync because invalid configuration was supplied.'
+  summary: Prometheus has failed to sync targets.
+expr: |
+  increase(prometheus_target_sync_failed_total{job="prometheus"}[30m]) > 0
+for: 5m
+labels:
+  severity: critical
+{{< /code >}}
+ 
+##### PrometheusErrorSendingAlertsToAnyAlertmanager
+'{{ printf "%.1f" $value }}% minimum errors while sending alerts from
+Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
+
+{{< code lang="yaml" >}}
+alert: PrometheusErrorSendingAlertsToAnyAlertmanager
+annotations:
+  description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from
+    Prometheus {{$labels.instance}} to any Alertmanager.'
+  summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
+expr: |
+  min without (alertmanager) (
+    rate(prometheus_notifications_errors_total{job="prometheus",alertmanager!~``}[5m])
+  /
+    rate(prometheus_notifications_sent_total{job="prometheus",alertmanager!~``}[5m])
+  )
+  * 100
+  > 3
+for: 15m
+labels:
+  severity: critical
+{{< /code >}}
+ 
+## Dashboards
+Following dashboards are generated from mixins and hosted on github:
+
+
+- [prometheus-remote-write](https://github.com/monitoring-mixins/website/blob/master/assets/prometheus/dashboards/prometheus-remote-write.json)
+- [prometheus](https://github.com/monitoring-mixins/website/blob/master/assets/prometheus/dashboards/prometheus.json)