mirror of
https://github.com/monitoring-mixins/website.git
synced 2024-12-14 11:37:31 +00:00
Merge pull request #30 from imusmanmalik/feat/add-update-cert-manager-mixin
feat: Add updated cert-manager mixin
This commit is contained in:
commit
085c6a9e67
5 changed files with 98 additions and 40 deletions
|
@ -5,8 +5,8 @@ groups:
|
|||
annotations:
|
||||
description: New certificates will not be able to be minted, and existing ones
|
||||
can't be renewed until cert-manager is back.
|
||||
runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagerabsent
|
||||
summary: Cert Manager has dissapeared from Prometheus service discovery.
|
||||
runbook_url: https://github.com/imusmanmalik/cert-manager-mixin/blob/main/RUNBOOK.md#certmanagerabsent
|
||||
summary: Cert Manager has disappeared from Prometheus service discovery.
|
||||
expr: absent(up{job="cert-manager"})
|
||||
for: 10m
|
||||
labels:
|
||||
|
@ -19,7 +19,7 @@ groups:
|
|||
description: The domain that this cert covers will be unavailable after {{ $value
|
||||
| humanizeDuration }}. Clients using endpoints that this cert protects will
|
||||
start to fail in {{ $value | humanizeDuration }}.
|
||||
runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagercertexpirysoon
|
||||
runbook_url: https://github.com/imusmanmalik/cert-manager-mixin/blob/main/RUNBOOK.md#certmanagercertexpirysoon
|
||||
summary: The cert `{{ $labels.name }}` is {{ $value | humanizeDuration }} from
|
||||
expiry, it should have renewed over a week ago.
|
||||
expr: |
|
||||
|
@ -35,7 +35,7 @@ groups:
|
|||
description: This certificate has not been ready to serve traffic for at least
|
||||
10m. If the cert is being renewed or there is another valid cert, the ingress
|
||||
controller _may_ be able to serve that instead.
|
||||
runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagercertnotready
|
||||
runbook_url: https://github.com/imusmanmalik/cert-manager-mixin/blob/main/RUNBOOK.md#certmanagercertnotready
|
||||
summary: The cert `{{ $labels.name }}` is not ready to serve traffic.
|
||||
expr: |
|
||||
max by (name, exported_namespace, namespace, condition) (
|
||||
|
@ -49,7 +49,7 @@ groups:
|
|||
dashboard_url: https://grafana.example.com/d/TvuRo2iMk/cert-manager
|
||||
description: Depending on the rate limit, cert-manager may be unable to generate
|
||||
certificates for up to a week.
|
||||
runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagerhittingratelimits
|
||||
runbook_url: https://github.com/imusmanmalik/cert-manager-mixin/blob/main/RUNBOOK.md#certmanagerhittingratelimits
|
||||
summary: Cert manager hitting LetsEncrypt rate limits.
|
||||
expr: |
|
||||
sum by (host) (
|
||||
|
|
|
@ -1,4 +1,33 @@
|
|||
{
|
||||
"_config": {
|
||||
"certManagerCertExpiryDays": "21",
|
||||
"certManagerJobLabel": "cert-manager",
|
||||
"certManagerRunbookURLPattern": "https://github.com/imusmanmalik/cert-manager-mixin/blob/main/RUNBOOK.md#%s",
|
||||
"dashboards": {
|
||||
"certmanagerCertificateExpirationTimestampSecondsSelector": "",
|
||||
"certmanagerCertificateReadyStatusSelector": "",
|
||||
"certmanagerControllerSyncCallCountSelector": "",
|
||||
"certmanagerHttpAcmeClientRequestCountSelector": "",
|
||||
"certmanagerHttpAcmeClientRequestDurationSecondsCountSelector": "",
|
||||
"certmanagerHttpAcmeClientRequestDurationSecondsSumSelector": "",
|
||||
"clusterVariableSelector": "",
|
||||
"containerCPUUsageSecondsTotalSelector": "container=\"cert-manager\"",
|
||||
"containerCpuCfsPeriodsTotalSelector": "container=\"cert-manager\"",
|
||||
"containerCpuCfsThrottledPeriodsTotalSelector": "container=\"cert-manager\"",
|
||||
"containerMemoryUsageBytesSelector": "container=\"cert-manager\"",
|
||||
"containerNetworkReceiveBytesTotalSelector": "namespace=\"cert-manager\"",
|
||||
"containerNetworkTransmitBytesTotalSelector": "namespace=\"cert-manager\"",
|
||||
"containerSelector": "container=\"cert-manager\"",
|
||||
"defaultSelector": "",
|
||||
"enableMultiCluster": false,
|
||||
"kubePodContainerResourceLimitsCpuCoresSelector": "container=\"cert-manager\"",
|
||||
"kubePodContainerResourceLimitsMemoryBytesSelector": "container=\"cert-manager\"",
|
||||
"kubePodContainerResourceRequestsCpuCoresSelector": "container=\"cert-manager\"",
|
||||
"kubePodContainerResourceRequestsMemoryBytesSelector": "container=\"cert-manager\"",
|
||||
"namespaceSelector": "namespace=\"cert-manager\""
|
||||
},
|
||||
"grafanaExternalUrl": "https://grafana.example.com"
|
||||
},
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
|
@ -89,9 +118,9 @@
|
|||
"pluginVersion": "7.4.5",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (condition) (certmanager_certificate_ready_status)",
|
||||
"expr": "sum by (condition) (certmanager_certificate_ready_status{ })",
|
||||
"interval": "",
|
||||
"legendFormat": "{{condition}}",
|
||||
"legendFormat": "{ {condition } }",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
@ -153,7 +182,7 @@
|
|||
"pluginVersion": "7.4.5",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "min(certmanager_certificate_expiration_timestamp_seconds > 0) - time()",
|
||||
"expr": "min(certmanager_certificate_expiration_timestamp_seconds{ } > 0) - time()",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
"interval": "",
|
||||
|
@ -267,7 +296,7 @@
|
|||
"pluginVersion": "7.4.5",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "label_join(avg by (name, namespace, condition, exported_namespace) (certmanager_certificate_ready_status == 1), \"namespaced_name\", \"-\", \"namespace\", \"exported_namespace\", \"name\")",
|
||||
"expr": "label_join(avg by (name, namespace, condition, exported_namespace) (certmanager_certificate_ready_status{ } == 1), \"namespaced_name\", \"-\", \"namespace\", \"exported_namespace\", \"name\")",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"interval": "",
|
||||
|
@ -275,7 +304,7 @@
|
|||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "label_join(avg by (name, namespace, exported_namespace) (certmanager_certificate_expiration_timestamp_seconds) * 1000, \"namespaced_name\", \"-\", \"namespace\", \"exported_namespace\", \"name\")",
|
||||
"expr": "label_join(avg by (name, namespace, exported_namespace) (certmanager_certificate_expiration_timestamp_seconds{ }) * 1000, \"namespaced_name\", \"-\", \"namespace\", \"exported_namespace\", \"name\")",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"interval": "",
|
||||
|
@ -392,9 +421,9 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (controller) (\n rate(certmanager_controller_sync_call_count[$__rate_interval])\n)",
|
||||
"expr": "sum by (controller) (\n rate(certmanager_controller_sync_call_count{ }[$__rate_interval ])\n)",
|
||||
"interval": "",
|
||||
"legendFormat": "{{controller}}",
|
||||
"legendFormat": "{ {controller } }",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
@ -493,9 +522,9 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (method, path, status) (\n rate(certmanager_http_acme_client_request_count[$__rate_interval])\n)",
|
||||
"expr": "sum by (method, path, status) (\n rate(certmanager_http_acme_client_request_count{ }[$__rate_interval ])\n)",
|
||||
"interval": "",
|
||||
"legendFormat": "{{method}} {{path}} {{status}}",
|
||||
"legendFormat": "{ {method } } { {path } } { {status } }",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
@ -594,9 +623,9 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (method, path, status) (rate(certmanager_http_acme_client_request_duration_seconds_sum[$__rate_interval]))\n/\nsum by (method, path, status) (rate(certmanager_http_acme_client_request_duration_seconds_count[$__rate_interval]))",
|
||||
"expr": "sum by (method, path, status) (rate(certmanager_http_acme_client_request_duration_seconds_sum{ }[$__rate_interval ]))\n/\nsum by (method, path, status) (rate(certmanager_http_acme_client_request_duration_seconds_count{ }[$__rate_interval ]))",
|
||||
"interval": "",
|
||||
"legendFormat": "{{method}} {{path}} {{status}}",
|
||||
"legendFormat": "{ {method } } { {path } } { {status } }",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
@ -712,30 +741,30 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg by (pod) (rate(container_cpu_usage_seconds_total{container=\"cert-manager\"}[$__rate_interval]))",
|
||||
"expr": "avg by (pod) (rate(container_cpu_usage_seconds_total{container=\"cert-manager\" }[$__rate_interval ]))",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "CPU {{pod}}",
|
||||
"legendFormat": "CPU { {pod } }",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "avg by (pod) (kube_pod_container_resource_limits_cpu_cores{container=\"cert-manager\"})",
|
||||
"expr": "avg by (pod) (kube_pod_container_resource_limits_cpu_cores{container=\"cert-manager\" })",
|
||||
"format": "time_series",
|
||||
"hide": true,
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Limit {{pod}}",
|
||||
"legendFormat": "Limit { {pod } }",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "avg by (pod) (kube_pod_container_resource_requests_cpu_cores{container=\"cert-manager\"})",
|
||||
"expr": "avg by (pod) (kube_pod_container_resource_requests_cpu_cores{container=\"cert-manager\" })",
|
||||
"format": "time_series",
|
||||
"hide": true,
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Request {{pod}}",
|
||||
"legendFormat": "Request { {pod } }",
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
|
@ -841,12 +870,12 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg by (pod) (\n rate(container_cpu_cfs_throttled_periods_total{container=\"cert-manager\"}[$__rate_interval])\n /\n rate(container_cpu_cfs_periods_total{container=\"cert-manager\"}[$__rate_interval])\n)",
|
||||
"expr": "avg by (pod) (\n rate(container_cpu_cfs_throttled_periods_total{container=\"cert-manager\" }[$__rate_interval ])\n /\n rate(container_cpu_cfs_periods_total{container=\"cert-manager\" }[$__rate_interval ])\n)",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendFormat": "{ {pod } }",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
@ -962,28 +991,28 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg by (pod) (container_memory_usage_bytes{container=\"cert-manager\"})",
|
||||
"expr": "avg by (pod) (container_memory_usage_bytes{container=\"cert-manager\" })",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Memory {{pod}}",
|
||||
"legendFormat": "Memory { {pod } }",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "avg by (pod) (kube_pod_container_resource_limits_memory_bytes{container=\"cert-manager\"})",
|
||||
"expr": "avg by (pod) (kube_pod_container_resource_limits_memory_bytes{container=\"cert-manager\" })",
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Limit {{pod}}",
|
||||
"legendFormat": "Limit { {pod } }",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "avg by (pod) (kube_pod_container_resource_requests_memory_bytes{container=\"cert-manager\"})",
|
||||
"expr": "avg by (pod) (kube_pod_container_resource_requests_memory_bytes{container=\"cert-manager\" })",
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Request {{pod}}",
|
||||
"legendFormat": "Request { {pod } }",
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
|
@ -1087,7 +1116,7 @@
|
|||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg(\n sum without (interface) (\n rate(container_network_receive_bytes_total{namespace=\"cert-manager\"}[$__rate_interval])\n )\n)",
|
||||
"expr": "avg(\n sum without (interface) (\n rate(container_network_receive_bytes_total{namespace=\"cert-manager\" }[$__rate_interval ])\n )\n)",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
|
@ -1096,7 +1125,7 @@
|
|||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "avg(\n sum without (interface) (\n rate(container_network_transmit_bytes_total{namespace=\"cert-manager\"}[$__rate_interval])\n )\n)",
|
||||
"expr": "avg(\n sum without (interface) (\n rate(container_network_transmit_bytes_total{namespace=\"cert-manager\" }[$__rate_interval ])\n )\n)",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
|
@ -1176,6 +1205,35 @@
|
|||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"definition": "",
|
||||
"hide": 2,
|
||||
"includeAll": false,
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": {
|
||||
"query": "label_values(certmanager_certificate_ready_status{ }, cluster)",
|
||||
"refId": "Prometheus-cluster-Variable-Query"
|
||||
},
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
|
|
|
@ -97,7 +97,7 @@
|
|||
},
|
||||
{
|
||||
"name": "cert-manager",
|
||||
"source": "https://gitlab.com/uneeq-oss/cert-manager-mixin.git",
|
||||
"source": "https://github.com/imusmanmalik/cert-manager-mixin.git",
|
||||
"subdir": ""
|
||||
},
|
||||
{
|
||||
|
|
|
@ -7,7 +7,7 @@ title: cert-manager
|
|||
|
||||
|
||||
{{< panel style="danger" >}}
|
||||
Jsonnet source code is available at [gitlab.com/uneeq-oss/cert-manager-mixin.git](https://gitlab.com/uneeq-oss/cert-manager-mixin.git)
|
||||
Jsonnet source code is available at [github.com/imusmanmalik/cert-manager-mixin.git](https://github.com/imusmanmalik/cert-manager-mixin.git)
|
||||
{{< /panel >}}
|
||||
|
||||
## Alerts
|
||||
|
@ -25,8 +25,8 @@ alert: CertManagerAbsent
|
|||
annotations:
|
||||
description: New certificates will not be able to be minted, and existing ones can't
|
||||
be renewed until cert-manager is back.
|
||||
runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagerabsent
|
||||
summary: Cert Manager has dissapeared from Prometheus service discovery.
|
||||
runbook_url: https://github.com/imusmanmalik/cert-manager-mixin/blob/main/RUNBOOK.md#certmanagerabsent
|
||||
summary: Cert Manager has disappeared from Prometheus service discovery.
|
||||
expr: absent(up{job="cert-manager"})
|
||||
for: 10m
|
||||
labels:
|
||||
|
@ -44,7 +44,7 @@ annotations:
|
|||
description: The domain that this cert covers will be unavailable after {{ $value
|
||||
| humanizeDuration }}. Clients using endpoints that this cert protects will start
|
||||
to fail in {{ $value | humanizeDuration }}.
|
||||
runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagercertexpirysoon
|
||||
runbook_url: https://github.com/imusmanmalik/cert-manager-mixin/blob/main/RUNBOOK.md#certmanagercertexpirysoon
|
||||
summary: The cert `{{ $labels.name }}` is {{ $value | humanizeDuration }} from expiry,
|
||||
it should have renewed over a week ago.
|
||||
expr: |
|
||||
|
@ -65,7 +65,7 @@ annotations:
|
|||
description: This certificate has not been ready to serve traffic for at least 10m.
|
||||
If the cert is being renewed or there is another valid cert, the ingress controller
|
||||
_may_ be able to serve that instead.
|
||||
runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagercertnotready
|
||||
runbook_url: https://github.com/imusmanmalik/cert-manager-mixin/blob/main/RUNBOOK.md#certmanagercertnotready
|
||||
summary: The cert `{{ $labels.name }}` is not ready to serve traffic.
|
||||
expr: |
|
||||
max by (name, exported_namespace, namespace, condition) (
|
||||
|
@ -84,7 +84,7 @@ annotations:
|
|||
dashboard_url: https://grafana.example.com/d/TvuRo2iMk/cert-manager
|
||||
description: Depending on the rate limit, cert-manager may be unable to generate
|
||||
certificates for up to a week.
|
||||
runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagerhittingratelimits
|
||||
runbook_url: https://github.com/imusmanmalik/cert-manager-mixin/blob/main/RUNBOOK.md#certmanagerhittingratelimits
|
||||
summary: Cert manager hitting LetsEncrypt rate limits.
|
||||
expr: |
|
||||
sum by (host) (
|
||||
|
|
|
@ -97,7 +97,7 @@
|
|||
},
|
||||
{
|
||||
"name": "cert-manager",
|
||||
"source": "https://gitlab.com/uneeq-oss/cert-manager-mixin.git",
|
||||
"source": "https://github.com/imusmanmalik/cert-manager-mixin.git",
|
||||
"subdir": ""
|
||||
},
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue