1
0
Fork 0
mirror of https://github.com/monitoring-mixins/website.git synced 2024-12-14 11:37:31 +00:00

Merge pull request #30 from imusmanmalik/feat/add-update-cert-manager-mixin

feat: Add updated cert-manager mixin
This commit is contained in:
Paweł Krupa 2023-10-30 09:43:57 +01:00 committed by GitHub
commit 085c6a9e67
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 98 additions and 40 deletions

View file

@ -5,8 +5,8 @@ groups:
annotations: annotations:
description: New certificates will not be able to be minted, and existing ones description: New certificates will not be able to be minted, and existing ones
can't be renewed until cert-manager is back. can't be renewed until cert-manager is back.
runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagerabsent runbook_url: https://github.com/imusmanmalik/cert-manager-mixin/blob/main/RUNBOOK.md#certmanagerabsent
summary: Cert Manager has dissapeared from Prometheus service discovery. summary: Cert Manager has disappeared from Prometheus service discovery.
expr: absent(up{job="cert-manager"}) expr: absent(up{job="cert-manager"})
for: 10m for: 10m
labels: labels:
@ -19,7 +19,7 @@ groups:
description: The domain that this cert covers will be unavailable after {{ $value description: The domain that this cert covers will be unavailable after {{ $value
| humanizeDuration }}. Clients using endpoints that this cert protects will | humanizeDuration }}. Clients using endpoints that this cert protects will
start to fail in {{ $value | humanizeDuration }}. start to fail in {{ $value | humanizeDuration }}.
runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagercertexpirysoon runbook_url: https://github.com/imusmanmalik/cert-manager-mixin/blob/main/RUNBOOK.md#certmanagercertexpirysoon
summary: The cert `{{ $labels.name }}` is {{ $value | humanizeDuration }} from summary: The cert `{{ $labels.name }}` is {{ $value | humanizeDuration }} from
expiry, it should have renewed over a week ago. expiry, it should have renewed over a week ago.
expr: | expr: |
@ -35,7 +35,7 @@ groups:
description: This certificate has not been ready to serve traffic for at least description: This certificate has not been ready to serve traffic for at least
10m. If the cert is being renewed or there is another valid cert, the ingress 10m. If the cert is being renewed or there is another valid cert, the ingress
controller _may_ be able to serve that instead. controller _may_ be able to serve that instead.
runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagercertnotready runbook_url: https://github.com/imusmanmalik/cert-manager-mixin/blob/main/RUNBOOK.md#certmanagercertnotready
summary: The cert `{{ $labels.name }}` is not ready to serve traffic. summary: The cert `{{ $labels.name }}` is not ready to serve traffic.
expr: | expr: |
max by (name, exported_namespace, namespace, condition) ( max by (name, exported_namespace, namespace, condition) (
@ -49,7 +49,7 @@ groups:
dashboard_url: https://grafana.example.com/d/TvuRo2iMk/cert-manager dashboard_url: https://grafana.example.com/d/TvuRo2iMk/cert-manager
description: Depending on the rate limit, cert-manager may be unable to generate description: Depending on the rate limit, cert-manager may be unable to generate
certificates for up to a week. certificates for up to a week.
runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagerhittingratelimits runbook_url: https://github.com/imusmanmalik/cert-manager-mixin/blob/main/RUNBOOK.md#certmanagerhittingratelimits
summary: Cert manager hitting LetsEncrypt rate limits. summary: Cert manager hitting LetsEncrypt rate limits.
expr: | expr: |
sum by (host) ( sum by (host) (

View file

@ -1,4 +1,33 @@
{ {
"_config": {
"certManagerCertExpiryDays": "21",
"certManagerJobLabel": "cert-manager",
"certManagerRunbookURLPattern": "https://github.com/imusmanmalik/cert-manager-mixin/blob/main/RUNBOOK.md#%s",
"dashboards": {
"certmanagerCertificateExpirationTimestampSecondsSelector": "",
"certmanagerCertificateReadyStatusSelector": "",
"certmanagerControllerSyncCallCountSelector": "",
"certmanagerHttpAcmeClientRequestCountSelector": "",
"certmanagerHttpAcmeClientRequestDurationSecondsCountSelector": "",
"certmanagerHttpAcmeClientRequestDurationSecondsSumSelector": "",
"clusterVariableSelector": "",
"containerCPUUsageSecondsTotalSelector": "container=\"cert-manager\"",
"containerCpuCfsPeriodsTotalSelector": "container=\"cert-manager\"",
"containerCpuCfsThrottledPeriodsTotalSelector": "container=\"cert-manager\"",
"containerMemoryUsageBytesSelector": "container=\"cert-manager\"",
"containerNetworkReceiveBytesTotalSelector": "namespace=\"cert-manager\"",
"containerNetworkTransmitBytesTotalSelector": "namespace=\"cert-manager\"",
"containerSelector": "container=\"cert-manager\"",
"defaultSelector": "",
"enableMultiCluster": false,
"kubePodContainerResourceLimitsCpuCoresSelector": "container=\"cert-manager\"",
"kubePodContainerResourceLimitsMemoryBytesSelector": "container=\"cert-manager\"",
"kubePodContainerResourceRequestsCpuCoresSelector": "container=\"cert-manager\"",
"kubePodContainerResourceRequestsMemoryBytesSelector": "container=\"cert-manager\"",
"namespaceSelector": "namespace=\"cert-manager\""
},
"grafanaExternalUrl": "https://grafana.example.com"
},
"annotations": { "annotations": {
"list": [ "list": [
{ {
@ -89,9 +118,9 @@
"pluginVersion": "7.4.5", "pluginVersion": "7.4.5",
"targets": [ "targets": [
{ {
"expr": "sum by (condition) (certmanager_certificate_ready_status)", "expr": "sum by (condition) (certmanager_certificate_ready_status{ })",
"interval": "", "interval": "",
"legendFormat": "{{condition}}", "legendFormat": "{ {condition } }",
"refId": "A" "refId": "A"
} }
], ],
@ -153,7 +182,7 @@
"pluginVersion": "7.4.5", "pluginVersion": "7.4.5",
"targets": [ "targets": [
{ {
"expr": "min(certmanager_certificate_expiration_timestamp_seconds > 0) - time()", "expr": "min(certmanager_certificate_expiration_timestamp_seconds{ } > 0) - time()",
"hide": false, "hide": false,
"instant": true, "instant": true,
"interval": "", "interval": "",
@ -267,7 +296,7 @@
"pluginVersion": "7.4.5", "pluginVersion": "7.4.5",
"targets": [ "targets": [
{ {
"expr": "label_join(avg by (name, namespace, condition, exported_namespace) (certmanager_certificate_ready_status == 1), \"namespaced_name\", \"-\", \"namespace\", \"exported_namespace\", \"name\")", "expr": "label_join(avg by (name, namespace, condition, exported_namespace) (certmanager_certificate_ready_status{ } == 1), \"namespaced_name\", \"-\", \"namespace\", \"exported_namespace\", \"name\")",
"format": "table", "format": "table",
"instant": true, "instant": true,
"interval": "", "interval": "",
@ -275,7 +304,7 @@
"refId": "A" "refId": "A"
}, },
{ {
"expr": "label_join(avg by (name, namespace, exported_namespace) (certmanager_certificate_expiration_timestamp_seconds) * 1000, \"namespaced_name\", \"-\", \"namespace\", \"exported_namespace\", \"name\")", "expr": "label_join(avg by (name, namespace, exported_namespace) (certmanager_certificate_expiration_timestamp_seconds{ }) * 1000, \"namespaced_name\", \"-\", \"namespace\", \"exported_namespace\", \"name\")",
"format": "table", "format": "table",
"instant": true, "instant": true,
"interval": "", "interval": "",
@ -392,9 +421,9 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum by (controller) (\n rate(certmanager_controller_sync_call_count[$__rate_interval])\n)", "expr": "sum by (controller) (\n rate(certmanager_controller_sync_call_count{ }[$__rate_interval ])\n)",
"interval": "", "interval": "",
"legendFormat": "{{controller}}", "legendFormat": "{ {controller } }",
"refId": "A" "refId": "A"
} }
], ],
@ -493,9 +522,9 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum by (method, path, status) (\n rate(certmanager_http_acme_client_request_count[$__rate_interval])\n)", "expr": "sum by (method, path, status) (\n rate(certmanager_http_acme_client_request_count{ }[$__rate_interval ])\n)",
"interval": "", "interval": "",
"legendFormat": "{{method}} {{path}} {{status}}", "legendFormat": "{ {method } } { {path } } { {status } }",
"refId": "A" "refId": "A"
} }
], ],
@ -594,9 +623,9 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum by (method, path, status) (rate(certmanager_http_acme_client_request_duration_seconds_sum[$__rate_interval]))\n/\nsum by (method, path, status) (rate(certmanager_http_acme_client_request_duration_seconds_count[$__rate_interval]))", "expr": "sum by (method, path, status) (rate(certmanager_http_acme_client_request_duration_seconds_sum{ }[$__rate_interval ]))\n/\nsum by (method, path, status) (rate(certmanager_http_acme_client_request_duration_seconds_count{ }[$__rate_interval ]))",
"interval": "", "interval": "",
"legendFormat": "{{method}} {{path}} {{status}}", "legendFormat": "{ {method } } { {path } } { {status } }",
"refId": "A" "refId": "A"
} }
], ],
@ -712,30 +741,30 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "avg by (pod) (rate(container_cpu_usage_seconds_total{container=\"cert-manager\"}[$__rate_interval]))", "expr": "avg by (pod) (rate(container_cpu_usage_seconds_total{container=\"cert-manager\" }[$__rate_interval ]))",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"interval": "", "interval": "",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "CPU {{pod}}", "legendFormat": "CPU { {pod } }",
"refId": "A" "refId": "A"
}, },
{ {
"expr": "avg by (pod) (kube_pod_container_resource_limits_cpu_cores{container=\"cert-manager\"})", "expr": "avg by (pod) (kube_pod_container_resource_limits_cpu_cores{container=\"cert-manager\" })",
"format": "time_series", "format": "time_series",
"hide": true, "hide": true,
"interval": "", "interval": "",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "Limit {{pod}}", "legendFormat": "Limit { {pod } }",
"refId": "B" "refId": "B"
}, },
{ {
"expr": "avg by (pod) (kube_pod_container_resource_requests_cpu_cores{container=\"cert-manager\"})", "expr": "avg by (pod) (kube_pod_container_resource_requests_cpu_cores{container=\"cert-manager\" })",
"format": "time_series", "format": "time_series",
"hide": true, "hide": true,
"interval": "", "interval": "",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "Request {{pod}}", "legendFormat": "Request { {pod } }",
"refId": "C" "refId": "C"
} }
], ],
@ -841,12 +870,12 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "avg by (pod) (\n rate(container_cpu_cfs_throttled_periods_total{container=\"cert-manager\"}[$__rate_interval])\n /\n rate(container_cpu_cfs_periods_total{container=\"cert-manager\"}[$__rate_interval])\n)", "expr": "avg by (pod) (\n rate(container_cpu_cfs_throttled_periods_total{container=\"cert-manager\" }[$__rate_interval ])\n /\n rate(container_cpu_cfs_periods_total{container=\"cert-manager\" }[$__rate_interval ])\n)",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"interval": "", "interval": "",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{pod}}", "legendFormat": "{ {pod } }",
"refId": "A" "refId": "A"
} }
], ],
@ -962,28 +991,28 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "avg by (pod) (container_memory_usage_bytes{container=\"cert-manager\"})", "expr": "avg by (pod) (container_memory_usage_bytes{container=\"cert-manager\" })",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"interval": "", "interval": "",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "Memory {{pod}}", "legendFormat": "Memory { {pod } }",
"refId": "A" "refId": "A"
}, },
{ {
"expr": "avg by (pod) (kube_pod_container_resource_limits_memory_bytes{container=\"cert-manager\"})", "expr": "avg by (pod) (kube_pod_container_resource_limits_memory_bytes{container=\"cert-manager\" })",
"format": "time_series", "format": "time_series",
"interval": "", "interval": "",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "Limit {{pod}}", "legendFormat": "Limit { {pod } }",
"refId": "B" "refId": "B"
}, },
{ {
"expr": "avg by (pod) (kube_pod_container_resource_requests_memory_bytes{container=\"cert-manager\"})", "expr": "avg by (pod) (kube_pod_container_resource_requests_memory_bytes{container=\"cert-manager\" })",
"format": "time_series", "format": "time_series",
"interval": "", "interval": "",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "Request {{pod}}", "legendFormat": "Request { {pod } }",
"refId": "C" "refId": "C"
} }
], ],
@ -1087,7 +1116,7 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "avg(\n sum without (interface) (\n rate(container_network_receive_bytes_total{namespace=\"cert-manager\"}[$__rate_interval])\n )\n)", "expr": "avg(\n sum without (interface) (\n rate(container_network_receive_bytes_total{namespace=\"cert-manager\" }[$__rate_interval ])\n )\n)",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"interval": "", "interval": "",
@ -1096,7 +1125,7 @@
"refId": "A" "refId": "A"
}, },
{ {
"expr": "avg(\n sum without (interface) (\n rate(container_network_transmit_bytes_total{namespace=\"cert-manager\"}[$__rate_interval])\n )\n)", "expr": "avg(\n sum without (interface) (\n rate(container_network_transmit_bytes_total{namespace=\"cert-manager\" }[$__rate_interval ])\n )\n)",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"interval": "", "interval": "",
@ -1176,6 +1205,35 @@
"regex": "", "regex": "",
"skipUrlSync": false, "skipUrlSync": false,
"type": "datasource" "type": "datasource"
},
{
"current": {
"selected": false,
"text": "",
"value": ""
},
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"definition": "",
"hide": 2,
"includeAll": false,
"multi": false,
"name": "cluster",
"options": [ ],
"query": {
"query": "label_values(certmanager_certificate_ready_status{ }, cluster)",
"refId": "Prometheus-cluster-Variable-Query"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
} }
] ]
}, },

View file

@ -97,7 +97,7 @@
}, },
{ {
"name": "cert-manager", "name": "cert-manager",
"source": "https://gitlab.com/uneeq-oss/cert-manager-mixin.git", "source": "https://github.com/imusmanmalik/cert-manager-mixin.git",
"subdir": "" "subdir": ""
}, },
{ {

View file

@ -7,7 +7,7 @@ title: cert-manager
{{< panel style="danger" >}} {{< panel style="danger" >}}
Jsonnet source code is available at [gitlab.com/uneeq-oss/cert-manager-mixin.git](https://gitlab.com/uneeq-oss/cert-manager-mixin.git) Jsonnet source code is available at [github.com/imusmanmalik/cert-manager-mixin.git](https://github.com/imusmanmalik/cert-manager-mixin.git)
{{< /panel >}} {{< /panel >}}
## Alerts ## Alerts
@ -25,8 +25,8 @@ alert: CertManagerAbsent
annotations: annotations:
description: New certificates will not be able to be minted, and existing ones can't description: New certificates will not be able to be minted, and existing ones can't
be renewed until cert-manager is back. be renewed until cert-manager is back.
runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagerabsent runbook_url: https://github.com/imusmanmalik/cert-manager-mixin/blob/main/RUNBOOK.md#certmanagerabsent
summary: Cert Manager has dissapeared from Prometheus service discovery. summary: Cert Manager has disappeared from Prometheus service discovery.
expr: absent(up{job="cert-manager"}) expr: absent(up{job="cert-manager"})
for: 10m for: 10m
labels: labels:
@ -44,7 +44,7 @@ annotations:
description: The domain that this cert covers will be unavailable after {{ $value description: The domain that this cert covers will be unavailable after {{ $value
| humanizeDuration }}. Clients using endpoints that this cert protects will start | humanizeDuration }}. Clients using endpoints that this cert protects will start
to fail in {{ $value | humanizeDuration }}. to fail in {{ $value | humanizeDuration }}.
runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagercertexpirysoon runbook_url: https://github.com/imusmanmalik/cert-manager-mixin/blob/main/RUNBOOK.md#certmanagercertexpirysoon
summary: The cert `{{ $labels.name }}` is {{ $value | humanizeDuration }} from expiry, summary: The cert `{{ $labels.name }}` is {{ $value | humanizeDuration }} from expiry,
it should have renewed over a week ago. it should have renewed over a week ago.
expr: | expr: |
@ -65,7 +65,7 @@ annotations:
description: This certificate has not been ready to serve traffic for at least 10m. description: This certificate has not been ready to serve traffic for at least 10m.
If the cert is being renewed or there is another valid cert, the ingress controller If the cert is being renewed or there is another valid cert, the ingress controller
_may_ be able to serve that instead. _may_ be able to serve that instead.
runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagercertnotready runbook_url: https://github.com/imusmanmalik/cert-manager-mixin/blob/main/RUNBOOK.md#certmanagercertnotready
summary: The cert `{{ $labels.name }}` is not ready to serve traffic. summary: The cert `{{ $labels.name }}` is not ready to serve traffic.
expr: | expr: |
max by (name, exported_namespace, namespace, condition) ( max by (name, exported_namespace, namespace, condition) (
@ -84,7 +84,7 @@ annotations:
dashboard_url: https://grafana.example.com/d/TvuRo2iMk/cert-manager dashboard_url: https://grafana.example.com/d/TvuRo2iMk/cert-manager
description: Depending on the rate limit, cert-manager may be unable to generate description: Depending on the rate limit, cert-manager may be unable to generate
certificates for up to a week. certificates for up to a week.
runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagerhittingratelimits runbook_url: https://github.com/imusmanmalik/cert-manager-mixin/blob/main/RUNBOOK.md#certmanagerhittingratelimits
summary: Cert manager hitting LetsEncrypt rate limits. summary: Cert manager hitting LetsEncrypt rate limits.
expr: | expr: |
sum by (host) ( sum by (host) (

View file

@ -97,7 +97,7 @@
}, },
{ {
"name": "cert-manager", "name": "cert-manager",
"source": "https://gitlab.com/uneeq-oss/cert-manager-mixin.git", "source": "https://github.com/imusmanmalik/cert-manager-mixin.git",
"subdir": "" "subdir": ""
}, },
{ {