mirror of
https://github.com/monitoring-mixins/website.git
synced 2024-12-14 11:37:31 +00:00
a672deef1c
https://github.com/danihodovic/celery-exporter/tree/master/celery-mixin https://github.com/adinhodovic/argo-cd-mixin https://github.com/adinhodovic/kubernetes-autoscaling-mixin https://github.com/adinhodovic/django-mixin https://github.com/adinhodovic/ingress-nginx-mixin
73 lines
2.7 KiB
YAML
73 lines
2.7 KiB
YAML
groups:
|
|
- name: karpenter
|
|
rules:
|
|
- alert: KarpenterCloudProviderErrors
|
|
annotations:
|
|
dashboard_url: https://grafana.com/d/kubernetes-autoscaling-mixin-kperf-jkwq/kubernetes-autoscaling-karpenter-performance
|
|
description: The Karpenter provider {{ $labels.provider }} with the controller
|
|
{{ $labels.controller }} has errors with the method {{ $labels.method }}.
|
|
summary: Karpenter has Cloud Provider Errors.
|
|
expr: |
|
|
sum(
|
|
increase(
|
|
karpenter_cloudprovider_errors_total{
|
|
job=~"karpenter"
|
|
}[5m]
|
|
)
|
|
) by (namespace, job, provider, controller, method) > 0
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
- alert: KarpenterNodepoolNearCapacity
|
|
annotations:
|
|
dashboard_url: https://grafana.com/d/kubernetes-autoscaling-mixin-kover-jkwq/kubernetes-autoscaling-karpenter-overview
|
|
description: The resource {{ $labels.resource_type }} in the Karpenter node
|
|
pool {{ $labels.nodepool }} is nearing its limit. Consider scaling or adding
|
|
resources.
|
|
summary: Karpenter Nodepool near capacity.
|
|
expr: |
|
|
sum (
|
|
karpenter_nodepools_usage{job=~"karpenter"}
|
|
) by (namespace, job, nodepool, resource_type)
|
|
/
|
|
sum (
|
|
karpenter_nodepools_limit{job=~"karpenter"}
|
|
) by (namespace, job, nodepool, resource_type)
|
|
* 100 > 75
|
|
for: 15m
|
|
labels:
|
|
severity: warning
|
|
- name: cluster-autoscaler
|
|
rules:
|
|
- alert: ClusterAutoscalerNodeCountNearCapacity
|
|
annotations:
|
|
dashboard_url: https://grafana.com/d/kubernetes-autoscaling-mixin-ca-jkwq/kubernetes-autoscaling-cluster-autoscaler
|
|
description: The node count for the cluster autoscaler job {{ $labels.job }}
|
|
is reaching max limit. Consider scaling node groups.
|
|
summary: Cluster Autoscaler Node Count near Capacity.
|
|
expr: |
|
|
sum (
|
|
cluster_autoscaler_nodes_count{job=~"cluster-autoscaler"}
|
|
) by (namespace, job)
|
|
/
|
|
sum (
|
|
cluster_autoscaler_max_nodes_count{job=~"cluster-autoscaler"}
|
|
) by (namespace, job)
|
|
* 100 > 75
|
|
for: 15m
|
|
labels:
|
|
severity: warning
|
|
- alert: ClusterAutoscalerUnschedulablePods
|
|
annotations:
|
|
dashboard_url: https://grafana.com/d/kubernetes-autoscaling-mixin-ca-jkwq/kubernetes-autoscaling-cluster-autoscaler
|
|
description: The cluster currently has unschedulable pods, indicating resource
|
|
shortages. Consider adding more nodes or increasing node group capacity.
|
|
summary: Pods Pending Scheduling - Cluster Node Group Scaling Required
|
|
expr: |
|
|
sum (
|
|
cluster_autoscaler_unschedulable_pods_count{job=~"cluster-autoscaler"}
|
|
) by (namespace, job)
|
|
> 0
|
|
for: 15m
|
|
labels:
|
|
severity: warning
|