diff --git a/assets/kubernetes/alerts.yaml b/assets/kubernetes/alerts.yaml index d138a77..c2aa960 100644 --- a/assets/kubernetes/alerts.yaml +++ b/assets/kubernetes/alerts.yaml @@ -261,7 +261,7 @@ groups: / sum(kube_node_status_allocatable{resource="cpu"}) > - (count(kube_node_status_allocatable{resource="cpu"}) -1) / count(kube_node_status_allocatable{resource="cpu"}) + ((count(kube_node_status_allocatable{resource="cpu"}) > 1) - 1) / count(kube_node_status_allocatable{resource="cpu"}) for: 5m labels: severity: warning @@ -276,7 +276,7 @@ groups: / sum(kube_node_status_allocatable{resource="memory"}) > - (count(kube_node_status_allocatable{resource="memory"})-1) + ((count(kube_node_status_allocatable{resource="memory"}) > 1) - 1) / count(kube_node_status_allocatable{resource="memory"}) for: 5m @@ -287,8 +287,11 @@ groups: description: Cluster has overcommitted CPU resource requests for Namespaces. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuquotaovercommit summary: Cluster has overcommitted CPU resource requests. - expr: "sum(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=\"cpu\"})\n - \ /\nsum(kube_node_status_allocatable{resource=\"cpu\"}) \n > 1.5\n" + expr: | + sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"}) + / + sum(kube_node_status_allocatable{resource="cpu"}) + > 1.5 for: 5m labels: severity: warning diff --git a/assets/loki/dashboards/loki-operational.json b/assets/loki/dashboards/loki-operational.json index a47e38c..49b625a 100644 --- a/assets/loki/dashboards/loki-operational.json +++ b/assets/loki/dashboards/loki-operational.json @@ -6678,7 +6678,7 @@ "multi": false, "name": "cluster", "options": [ ], - "query": "label_values(kube_pod_container_info, cluster)", + "query": "label_values(kube_pod_container_info{image=~\".*loki.*\", container!=\"loki-canary\"}, cluster)", "refresh": 1, "regex": "", "sort": 2, @@ -6698,7 +6698,7 @@ "multi": false, "name": "namespace", "options": [ ], - "query": "label_values(kube_pod_container_info{cluster=\"$cluster\"}, namespace)", + "query": "label_values(kube_pod_container_info{cluster=\"$cluster\", image=~\".*loki.*\", container!=\"loki-canary\"}, namespace)", "refresh": 1, "regex": "", "sort": 2, diff --git a/site/content/kubernetes/_index.md b/site/content/kubernetes/_index.md index 7ef2756..90ade95 100644 --- a/site/content/kubernetes/_index.md +++ b/site/content/kubernetes/_index.md @@ -373,7 +373,7 @@ expr: | / sum(kube_node_status_allocatable{resource="cpu"}) > - (count(kube_node_status_allocatable{resource="cpu"}) -1) / count(kube_node_status_allocatable{resource="cpu"}) + ((count(kube_node_status_allocatable{resource="cpu"}) > 1) - 1) / count(kube_node_status_allocatable{resource="cpu"}) for: 5m labels: severity: warning @@ -394,7 +394,7 @@ expr: | / sum(kube_node_status_allocatable{resource="memory"}) > - (count(kube_node_status_allocatable{resource="memory"})-1) + ((count(kube_node_status_allocatable{resource="memory"}) > 1) - 1) / count(kube_node_status_allocatable{resource="memory"}) for: 5m @@ -411,12 +411,11 @@ annotations: description: Cluster has overcommitted CPU resource requests for Namespaces. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuquotaovercommit summary: Cluster has overcommitted CPU resource requests. -expr: "sum(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=\"cpu\"}) - - \ / -sum(kube_node_status_allocatable{resource=\"cpu\"}) - > 1.5 -" +expr: | + sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"}) + / + sum(kube_node_status_allocatable{resource="cpu"}) + > 1.5 for: 5m labels: severity: warning