mirror of
https://github.com/monitoring-mixins/website.git
synced 2024-12-14 11:37:31 +00:00
assets,site/content: daily assets regeneration
This commit is contained in:
parent
018f973028
commit
11f6cd77f3
13 changed files with 3091 additions and 83 deletions
|
@ -1033,7 +1033,7 @@
|
|||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"decimals": 1,
|
||||
"decimals": 0,
|
||||
"noValue": "All healthy",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
|
@ -1052,7 +1052,7 @@
|
|||
}
|
||||
]
|
||||
},
|
||||
"unit": "percentunit"
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
|
|
|
@ -231,7 +231,7 @@ groups:
|
|||
<
|
||||
kube_hpa_spec_max_replicas{job="kube-state-metrics"})
|
||||
and
|
||||
changes(kube_hpa_status_current_replicas[15m]) == 0
|
||||
changes(kube_hpa_status_current_replicas{job="kube-state-metrics"}[15m]) == 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
|
@ -272,7 +272,7 @@ groups:
|
|||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit
|
||||
summary: Cluster has overcommitted memory resource requests.
|
||||
expr: |
|
||||
sum(namespace_memory:kube_pod_container_resource_requests_bytes:sum{})
|
||||
sum(namespace_memory:kube_pod_container_resource_requests:sum{})
|
||||
/
|
||||
sum(kube_node_status_allocatable{resource="memory"})
|
||||
>
|
||||
|
|
|
@ -123,7 +123,7 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"cpu\",cluster=\"$cluster\"})",
|
||||
"expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"cpu\",cluster=\"$cluster\"})",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
|
@ -351,7 +351,7 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"memory\",cluster=\"$cluster\"})",
|
||||
"expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"memory\",cluster=\"$cluster\"})",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
|
@ -768,7 +768,7 @@
|
|||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum{cluster=\"$cluster\"}) by (namespace)",
|
||||
"expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
|
|
|
@ -46,7 +46,7 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{namespace=\"$namespace\", resource=\"cpu\"})",
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
|
@ -122,7 +122,7 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{namespace=\"$namespace\", resource=\"cpu\"})",
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
|
@ -198,7 +198,7 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{namespace=\"$namespace\", resource=\"memory\"})",
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
|
@ -274,7 +274,7 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits{namespace=\"$namespace\", resource=\"memory\"})",
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
|
@ -606,7 +606,7 @@
|
|||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(kube_pod_container_resource_requests{namespace=\"$namespace\", resource=\"cpu\"}) by (pod)",
|
||||
"expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
|
@ -615,7 +615,7 @@
|
|||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_requests{namespace=\"$namespace\", resource=\"cpu\"}) by (pod)",
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
|
@ -624,7 +624,7 @@
|
|||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(kube_pod_container_resource_limits{namespace=\"$namespace\", resource=\"cpu\"}) by (pod)",
|
||||
"expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
|
@ -633,7 +633,7 @@
|
|||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_limits{namespace=\"$namespace\", resource=\"cpu\"}) by (pod)",
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
|
@ -1013,7 +1013,7 @@
|
|||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(kube_pod_container_resource_requests{namespace=\"$namespace\", resource=\"memory\"}) by (pod)",
|
||||
"expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
|
@ -1022,7 +1022,7 @@
|
|||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests{namespace=\"$namespace\", resource=\"memory\"}) by (pod)",
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
|
@ -1031,7 +1031,7 @@
|
|||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(kube_pod_container_resource_limits{namespace=\"$namespace\", resource=\"memory\"}) by (pod)",
|
||||
"expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
|
@ -1040,7 +1040,7 @@
|
|||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits{namespace=\"$namespace\", resource=\"memory\"}) by (pod)",
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
|
|
1766
assets/loki/dashboards/loki-reads-resources.json
Normal file
1766
assets/loki/dashboards/loki-reads-resources.json
Normal file
File diff suppressed because it is too large
Load diff
1240
assets/loki/dashboards/loki-writes-resources.json
Normal file
1240
assets/loki/dashboards/loki-writes-resources.json
Normal file
File diff suppressed because it is too large
Load diff
|
@ -45,7 +45,7 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(\n instance:node_cpu_utilisation:rate1m{job=\"node\"}\n*\n instance:node_num_cpu:sum{job=\"node\"}\n)\n/ scalar(sum(instance:node_num_cpu:sum{job=\"node\"}))\n",
|
||||
"expr": "(\n instance:node_cpu_utilisation:rate5m{job=\"node\"}\n*\n instance:node_num_cpu:sum{job=\"node\"}\n)\n/ scalar(sum(instance:node_num_cpu:sum{job=\"node\"}))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}",
|
||||
|
@ -285,7 +285,7 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "instance:node_vmstat_pgmajfault:rate1m{job=\"node\"}",
|
||||
"expr": "instance:node_vmstat_pgmajfault:rate5m{job=\"node\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}",
|
||||
|
@ -383,7 +383,7 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "instance:node_network_receive_bytes_excluding_lo:rate1m{job=\"node\"}",
|
||||
"expr": "instance:node_network_receive_bytes_excluding_lo:rate5m{job=\"node\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} Receive",
|
||||
|
@ -391,7 +391,7 @@
|
|||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "instance:node_network_transmit_bytes_excluding_lo:rate1m{job=\"node\"}",
|
||||
"expr": "instance:node_network_transmit_bytes_excluding_lo:rate5m{job=\"node\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} Transmit",
|
||||
|
@ -477,7 +477,7 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "instance:node_network_receive_drop_excluding_lo:rate1m{job=\"node\"}",
|
||||
"expr": "instance:node_network_receive_drop_excluding_lo:rate5m{job=\"node\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} Receive",
|
||||
|
@ -485,7 +485,7 @@
|
|||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "instance:node_network_transmit_drop_excluding_lo:rate1m{job=\"node\"}",
|
||||
"expr": "instance:node_network_transmit_drop_excluding_lo:rate5m{job=\"node\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} Transmit",
|
||||
|
@ -573,7 +573,7 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "instance_device:node_disk_io_time_seconds:rate1m{job=\"node\"}\n/ scalar(count(instance_device:node_disk_io_time_seconds:rate1m{job=\"node\"}))\n",
|
||||
"expr": "instance_device:node_disk_io_time_seconds:rate5m{job=\"node\"}\n/ scalar(count(instance_device:node_disk_io_time_seconds:rate5m{job=\"node\"}))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} {{device}}",
|
||||
|
@ -649,7 +649,7 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "instance_device:node_disk_io_time_weighted_seconds:rate1m{job=\"node\"}\n/ scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate1m{job=\"node\"}))\n",
|
||||
"expr": "instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node\"}\n/ scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node\"}))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} {{device}}",
|
||||
|
|
|
@ -45,7 +45,7 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "instance:node_cpu_utilisation:rate1m{job=\"node\", instance=\"$instance\"}",
|
||||
"expr": "instance:node_cpu_utilisation:rate5m{job=\"node\", instance=\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Utilisation",
|
||||
|
@ -285,7 +285,7 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "instance:node_vmstat_pgmajfault:rate1m{job=\"node\", instance=\"$instance\"}",
|
||||
"expr": "instance:node_vmstat_pgmajfault:rate5m{job=\"node\", instance=\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Major page faults",
|
||||
|
@ -383,7 +383,7 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "instance:node_network_receive_bytes_excluding_lo:rate1m{job=\"node\", instance=\"$instance\"}",
|
||||
"expr": "instance:node_network_receive_bytes_excluding_lo:rate5m{job=\"node\", instance=\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Receive",
|
||||
|
@ -391,7 +391,7 @@
|
|||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "instance:node_network_transmit_bytes_excluding_lo:rate1m{job=\"node\", instance=\"$instance\"}",
|
||||
"expr": "instance:node_network_transmit_bytes_excluding_lo:rate5m{job=\"node\", instance=\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Transmit",
|
||||
|
@ -477,7 +477,7 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "instance:node_network_receive_drop_excluding_lo:rate1m{job=\"node\", instance=\"$instance\"}",
|
||||
"expr": "instance:node_network_receive_drop_excluding_lo:rate5m{job=\"node\", instance=\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Receive drops",
|
||||
|
@ -485,7 +485,7 @@
|
|||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "instance:node_network_transmit_drop_excluding_lo:rate1m{job=\"node\", instance=\"$instance\"}",
|
||||
"expr": "instance:node_network_transmit_drop_excluding_lo:rate5m{job=\"node\", instance=\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Transmit drops",
|
||||
|
@ -573,7 +573,7 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "instance_device:node_disk_io_time_seconds:rate1m{job=\"node\", instance=\"$instance\"}",
|
||||
"expr": "instance_device:node_disk_io_time_seconds:rate5m{job=\"node\", instance=\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{device}}",
|
||||
|
@ -649,7 +649,7 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "instance_device:node_disk_io_time_weighted_seconds:rate1m{job=\"node\", instance=\"$instance\"}",
|
||||
"expr": "instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node\", instance=\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{device}}",
|
||||
|
|
|
@ -56,7 +56,7 @@
|
|||
{
|
||||
"expr": "(\n (1 - rate(node_cpu_seconds_total{job=\"node\", mode=\"idle\", instance=\"$instance\"}[$__interval]))\n/ ignoring(cpu) group_left\n count without (cpu)( node_cpu_seconds_total{job=\"node\", mode=\"idle\", instance=\"$instance\"})\n)\n",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"interval": "$__rate_interval",
|
||||
"intervalFactor": 5,
|
||||
"legendFormat": "{{cpu}}",
|
||||
"refId": "A"
|
||||
|
@ -454,7 +454,7 @@
|
|||
{
|
||||
"expr": "rate(node_disk_read_bytes_total{job=\"node\", instance=\"$instance\", device!=\"\"}[$__interval])",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"interval": "$__rate_interval",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{device}} read",
|
||||
"refId": "A"
|
||||
|
@ -462,7 +462,7 @@
|
|||
{
|
||||
"expr": "rate(node_disk_written_bytes_total{job=\"node\", instance=\"$instance\", device!=\"\"}[$__interval])",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"interval": "$__rate_interval",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{device}} written",
|
||||
"refId": "B"
|
||||
|
@ -470,7 +470,7 @@
|
|||
{
|
||||
"expr": "rate(node_disk_io_time_seconds_total{job=\"node\", instance=\"$instance\", device!=\"\"}[$__interval])",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"interval": "$__rate_interval",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{device}} io time",
|
||||
"refId": "C"
|
||||
|
@ -662,7 +662,7 @@
|
|||
{
|
||||
"expr": "rate(node_network_receive_bytes_total{job=\"node\", instance=\"$instance\", device!=\"lo\"}[$__interval])",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"interval": "$__rate_interval",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{device}}",
|
||||
"refId": "A"
|
||||
|
@ -744,7 +744,7 @@
|
|||
{
|
||||
"expr": "rate(node_network_transmit_bytes_total{job=\"node\", instance=\"$instance\", device!=\"lo\"}[$__interval])",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"interval": "$__rate_interval",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{device}}",
|
||||
"refId": "A"
|
||||
|
|
|
@ -10,9 +10,9 @@ groups:
|
|||
record: instance:node_num_cpu:sum
|
||||
- expr: |
|
||||
1 - avg without (cpu, mode) (
|
||||
rate(node_cpu_seconds_total{job="node", mode="idle"}[1m])
|
||||
rate(node_cpu_seconds_total{job="node", mode="idle"}[5m])
|
||||
)
|
||||
record: instance:node_cpu_utilisation:rate1m
|
||||
record: instance:node_cpu_utilisation:rate5m
|
||||
- expr: |
|
||||
(
|
||||
node_load1{job="node"}
|
||||
|
@ -28,31 +28,31 @@ groups:
|
|||
)
|
||||
record: instance:node_memory_utilisation:ratio
|
||||
- expr: |
|
||||
rate(node_vmstat_pgmajfault{job="node"}[1m])
|
||||
record: instance:node_vmstat_pgmajfault:rate1m
|
||||
rate(node_vmstat_pgmajfault{job="node"}[5m])
|
||||
record: instance:node_vmstat_pgmajfault:rate5m
|
||||
- expr: |
|
||||
rate(node_disk_io_time_seconds_total{job="node", device!=""}[1m])
|
||||
record: instance_device:node_disk_io_time_seconds:rate1m
|
||||
rate(node_disk_io_time_seconds_total{job="node", device!=""}[5m])
|
||||
record: instance_device:node_disk_io_time_seconds:rate5m
|
||||
- expr: |
|
||||
rate(node_disk_io_time_weighted_seconds_total{job="node", device!=""}[1m])
|
||||
record: instance_device:node_disk_io_time_weighted_seconds:rate1m
|
||||
rate(node_disk_io_time_weighted_seconds_total{job="node", device!=""}[5m])
|
||||
record: instance_device:node_disk_io_time_weighted_seconds:rate5m
|
||||
- expr: |
|
||||
sum without (device) (
|
||||
rate(node_network_receive_bytes_total{job="node", device!="lo"}[1m])
|
||||
rate(node_network_receive_bytes_total{job="node", device!="lo"}[5m])
|
||||
)
|
||||
record: instance:node_network_receive_bytes_excluding_lo:rate1m
|
||||
record: instance:node_network_receive_bytes_excluding_lo:rate5m
|
||||
- expr: |
|
||||
sum without (device) (
|
||||
rate(node_network_transmit_bytes_total{job="node", device!="lo"}[1m])
|
||||
rate(node_network_transmit_bytes_total{job="node", device!="lo"}[5m])
|
||||
)
|
||||
record: instance:node_network_transmit_bytes_excluding_lo:rate1m
|
||||
record: instance:node_network_transmit_bytes_excluding_lo:rate5m
|
||||
- expr: |
|
||||
sum without (device) (
|
||||
rate(node_network_receive_drop_total{job="node", device!="lo"}[1m])
|
||||
rate(node_network_receive_drop_total{job="node", device!="lo"}[5m])
|
||||
)
|
||||
record: instance:node_network_receive_drop_excluding_lo:rate1m
|
||||
record: instance:node_network_receive_drop_excluding_lo:rate5m
|
||||
- expr: |
|
||||
sum without (device) (
|
||||
rate(node_network_transmit_drop_total{job="node", device!="lo"}[1m])
|
||||
rate(node_network_transmit_drop_total{job="node", device!="lo"}[5m])
|
||||
)
|
||||
record: instance:node_network_transmit_drop_excluding_lo:rate1m
|
||||
record: instance:node_network_transmit_drop_excluding_lo:rate5m
|
||||
|
|
|
@ -331,7 +331,7 @@ expr: |
|
|||
<
|
||||
kube_hpa_spec_max_replicas{job="kube-state-metrics"})
|
||||
and
|
||||
changes(kube_hpa_status_current_replicas[15m]) == 0
|
||||
changes(kube_hpa_status_current_replicas{job="kube-state-metrics"}[15m]) == 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
|
@ -390,7 +390,7 @@ annotations:
|
|||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit
|
||||
summary: Cluster has overcommitted memory resource requests.
|
||||
expr: |
|
||||
sum(namespace_memory:kube_pod_container_resource_requests_bytes:sum{})
|
||||
sum(namespace_memory:kube_pod_container_resource_requests:sum{})
|
||||
/
|
||||
sum(kube_node_status_allocatable{resource="memory"})
|
||||
>
|
||||
|
|
|
@ -213,5 +213,7 @@ Following dashboards are generated from mixins and hosted on github:
|
|||
- [loki-chunks](https://github.com/monitoring-mixins/website/blob/master/assets/loki/dashboards/loki-chunks.json)
|
||||
- [loki-logs](https://github.com/monitoring-mixins/website/blob/master/assets/loki/dashboards/loki-logs.json)
|
||||
- [loki-operational](https://github.com/monitoring-mixins/website/blob/master/assets/loki/dashboards/loki-operational.json)
|
||||
- [loki-reads-resources](https://github.com/monitoring-mixins/website/blob/master/assets/loki/dashboards/loki-reads-resources.json)
|
||||
- [loki-reads](https://github.com/monitoring-mixins/website/blob/master/assets/loki/dashboards/loki-reads.json)
|
||||
- [loki-writes-resources](https://github.com/monitoring-mixins/website/blob/master/assets/loki/dashboards/loki-writes-resources.json)
|
||||
- [loki-writes](https://github.com/monitoring-mixins/website/blob/master/assets/loki/dashboards/loki-writes.json)
|
||||
|
|
|
@ -326,14 +326,14 @@ expr: |
|
|||
record: instance:node_num_cpu:sum
|
||||
{{< /code >}}
|
||||
|
||||
##### instance:node_cpu_utilisation:rate1m
|
||||
##### instance:node_cpu_utilisation:rate5m
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
expr: |
|
||||
1 - avg without (cpu, mode) (
|
||||
rate(node_cpu_seconds_total{job="node", mode="idle"}[1m])
|
||||
rate(node_cpu_seconds_total{job="node", mode="idle"}[5m])
|
||||
)
|
||||
record: instance:node_cpu_utilisation:rate1m
|
||||
record: instance:node_cpu_utilisation:rate5m
|
||||
{{< /code >}}
|
||||
|
||||
##### instance:node_load1_per_cpu:ratio
|
||||
|
@ -360,68 +360,68 @@ expr: |
|
|||
record: instance:node_memory_utilisation:ratio
|
||||
{{< /code >}}
|
||||
|
||||
##### instance:node_vmstat_pgmajfault:rate1m
|
||||
##### instance:node_vmstat_pgmajfault:rate5m
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
expr: |
|
||||
rate(node_vmstat_pgmajfault{job="node"}[1m])
|
||||
record: instance:node_vmstat_pgmajfault:rate1m
|
||||
rate(node_vmstat_pgmajfault{job="node"}[5m])
|
||||
record: instance:node_vmstat_pgmajfault:rate5m
|
||||
{{< /code >}}
|
||||
|
||||
##### instance_device:node_disk_io_time_seconds:rate1m
|
||||
##### instance_device:node_disk_io_time_seconds:rate5m
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
expr: |
|
||||
rate(node_disk_io_time_seconds_total{job="node", device!=""}[1m])
|
||||
record: instance_device:node_disk_io_time_seconds:rate1m
|
||||
rate(node_disk_io_time_seconds_total{job="node", device!=""}[5m])
|
||||
record: instance_device:node_disk_io_time_seconds:rate5m
|
||||
{{< /code >}}
|
||||
|
||||
##### instance_device:node_disk_io_time_weighted_seconds:rate1m
|
||||
##### instance_device:node_disk_io_time_weighted_seconds:rate5m
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
expr: |
|
||||
rate(node_disk_io_time_weighted_seconds_total{job="node", device!=""}[1m])
|
||||
record: instance_device:node_disk_io_time_weighted_seconds:rate1m
|
||||
rate(node_disk_io_time_weighted_seconds_total{job="node", device!=""}[5m])
|
||||
record: instance_device:node_disk_io_time_weighted_seconds:rate5m
|
||||
{{< /code >}}
|
||||
|
||||
##### instance:node_network_receive_bytes_excluding_lo:rate1m
|
||||
##### instance:node_network_receive_bytes_excluding_lo:rate5m
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
expr: |
|
||||
sum without (device) (
|
||||
rate(node_network_receive_bytes_total{job="node", device!="lo"}[1m])
|
||||
rate(node_network_receive_bytes_total{job="node", device!="lo"}[5m])
|
||||
)
|
||||
record: instance:node_network_receive_bytes_excluding_lo:rate1m
|
||||
record: instance:node_network_receive_bytes_excluding_lo:rate5m
|
||||
{{< /code >}}
|
||||
|
||||
##### instance:node_network_transmit_bytes_excluding_lo:rate1m
|
||||
##### instance:node_network_transmit_bytes_excluding_lo:rate5m
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
expr: |
|
||||
sum without (device) (
|
||||
rate(node_network_transmit_bytes_total{job="node", device!="lo"}[1m])
|
||||
rate(node_network_transmit_bytes_total{job="node", device!="lo"}[5m])
|
||||
)
|
||||
record: instance:node_network_transmit_bytes_excluding_lo:rate1m
|
||||
record: instance:node_network_transmit_bytes_excluding_lo:rate5m
|
||||
{{< /code >}}
|
||||
|
||||
##### instance:node_network_receive_drop_excluding_lo:rate1m
|
||||
##### instance:node_network_receive_drop_excluding_lo:rate5m
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
expr: |
|
||||
sum without (device) (
|
||||
rate(node_network_receive_drop_total{job="node", device!="lo"}[1m])
|
||||
rate(node_network_receive_drop_total{job="node", device!="lo"}[5m])
|
||||
)
|
||||
record: instance:node_network_receive_drop_excluding_lo:rate1m
|
||||
record: instance:node_network_receive_drop_excluding_lo:rate5m
|
||||
{{< /code >}}
|
||||
|
||||
##### instance:node_network_transmit_drop_excluding_lo:rate1m
|
||||
##### instance:node_network_transmit_drop_excluding_lo:rate5m
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
expr: |
|
||||
sum without (device) (
|
||||
rate(node_network_transmit_drop_total{job="node", device!="lo"}[1m])
|
||||
rate(node_network_transmit_drop_total{job="node", device!="lo"}[5m])
|
||||
)
|
||||
record: instance:node_network_transmit_drop_excluding_lo:rate1m
|
||||
record: instance:node_network_transmit_drop_excluding_lo:rate5m
|
||||
{{< /code >}}
|
||||
|
||||
## Dashboards
|
||||
|
|
Loading…
Reference in a new issue