mirror of
https://github.com/monitoring-mixins/website.git
synced 2024-12-14 11:37:31 +00:00
Merge pull request #24 from paulfantom/promscale
This commit is contained in:
commit
49fbf2ed0a
24 changed files with 5308 additions and 126 deletions
2
.github/workflows/tests.yaml
vendored
2
.github/workflows/tests.yaml
vendored
|
@ -16,5 +16,5 @@ jobs:
|
|||
- uses: actions/checkout@v2
|
||||
- uses: actions/setup-go@v2
|
||||
with:
|
||||
go-version: '^1.14'
|
||||
go-version: '^1.17'
|
||||
- run: make generate
|
||||
|
|
|
@ -810,7 +810,7 @@
|
|||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"collapsed": false,
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
|
@ -819,6 +819,7 @@
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": { },
|
||||
"id": 10,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
|
@ -845,7 +846,7 @@
|
|||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
|
@ -907,6 +908,7 @@
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": { },
|
||||
"id": 11,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
|
@ -933,7 +935,7 @@
|
|||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
|
@ -995,6 +997,7 @@
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": { },
|
||||
"id": 12,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
|
@ -1015,7 +1018,7 @@
|
|||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
|
@ -1061,19 +1064,7 @@
|
|||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Querier",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
|
@ -1081,6 +1072,7 @@
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"gridPos": { },
|
||||
"id": 13,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
|
@ -1101,7 +1093,7 @@
|
|||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"span": 6,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
|
@ -1157,6 +1149,7 @@
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"gridPos": { },
|
||||
"id": 14,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
|
@ -1177,7 +1170,7 @@
|
|||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"span": 6,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
|
@ -1233,6 +1226,7 @@
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": { },
|
||||
"id": 15,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
|
@ -1253,7 +1247,7 @@
|
|||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
|
@ -1307,12 +1301,13 @@
|
|||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "",
|
||||
"titleSize": "h6"
|
||||
"title": "Querier",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"collapsed": false,
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
|
@ -1321,6 +1316,7 @@
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": { },
|
||||
"id": 16,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
|
@ -1347,7 +1343,7 @@
|
|||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
|
@ -1409,6 +1405,7 @@
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": { },
|
||||
"id": 17,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
|
@ -1435,7 +1432,7 @@
|
|||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
|
@ -1497,6 +1494,7 @@
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": { },
|
||||
"id": 18,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
|
@ -1517,7 +1515,7 @@
|
|||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
|
@ -1563,19 +1561,7 @@
|
|||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Index Gateway",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
|
@ -1583,6 +1569,7 @@
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"gridPos": { },
|
||||
"id": 19,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
|
@ -1603,7 +1590,7 @@
|
|||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 3,
|
||||
"span": 6,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
|
@ -1659,6 +1646,7 @@
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"gridPos": { },
|
||||
"id": 20,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
|
@ -1679,7 +1667,7 @@
|
|||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 3,
|
||||
"span": 6,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
|
@ -1735,6 +1723,7 @@
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": { },
|
||||
"id": 21,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
|
@ -1755,7 +1744,7 @@
|
|||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 3,
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
|
@ -1811,6 +1800,7 @@
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": { },
|
||||
"id": 22,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
|
@ -1831,7 +1821,7 @@
|
|||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 3,
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
|
@ -1885,8 +1875,9 @@
|
|||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "",
|
||||
"titleSize": "h6"
|
||||
"title": "Index Gateway",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
|
@ -2152,7 +2143,7 @@
|
|||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"collapsed": false,
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
|
@ -2161,6 +2152,7 @@
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": { },
|
||||
"id": 26,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
|
@ -2237,6 +2229,7 @@
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": { },
|
||||
"id": 27,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
|
@ -2317,19 +2310,7 @@
|
|||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Ruler",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
|
@ -2337,6 +2318,7 @@
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": { },
|
||||
"id": 28,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
|
@ -2425,6 +2407,7 @@
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": { },
|
||||
"id": 29,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
|
@ -2497,8 +2480,9 @@
|
|||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "",
|
||||
"titleSize": "h6"
|
||||
"title": "Ruler",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 14,
|
||||
|
|
|
@ -548,7 +548,7 @@
|
|||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"collapsed": false,
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
|
@ -557,6 +557,7 @@
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": { },
|
||||
"id": 7,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
|
@ -631,6 +632,7 @@
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": { },
|
||||
"id": 8,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
|
@ -711,19 +713,7 @@
|
|||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Ingester",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
|
@ -731,6 +721,7 @@
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": { },
|
||||
"id": 9,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
|
@ -819,6 +810,7 @@
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": { },
|
||||
"id": 10,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
|
@ -885,19 +877,7 @@
|
|||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
|
@ -905,6 +885,7 @@
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"gridPos": { },
|
||||
"id": 11,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
|
@ -925,7 +906,7 @@
|
|||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"span": 6,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
|
@ -981,6 +962,7 @@
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"gridPos": { },
|
||||
"id": 12,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
|
@ -1001,7 +983,7 @@
|
|||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"span": 6,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
|
@ -1057,6 +1039,7 @@
|
|||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": { },
|
||||
"id": 13,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
|
@ -1077,7 +1060,7 @@
|
|||
"renderer": "flot",
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
|
@ -1131,8 +1114,9 @@
|
|||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "",
|
||||
"titleSize": "h6"
|
||||
"title": "Ingester",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 14,
|
||||
|
|
321
assets/promscale/alerts.yaml
Normal file
321
assets/promscale/alerts.yaml
Normal file
|
@ -0,0 +1,321 @@
|
|||
groups:
|
||||
- name: promscale-general
|
||||
rules:
|
||||
- alert: PromscaleDown
|
||||
annotations:
|
||||
description: No Promscale instance was found.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleDown.md
|
||||
summary: Promscale is down
|
||||
expr: absent(up{job=~".*promscale.*"})
|
||||
labels:
|
||||
severity: critical
|
||||
- name: promscale-ingest
|
||||
rules:
|
||||
- alert: PromscaleIngestHighErrorRate
|
||||
annotations:
|
||||
description: Promscale ingestion is having a {{ $value | humanizePercentage
|
||||
}} error rate.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleIngestHighErrorRate.md
|
||||
summary: High error rate in Promscale ingestion
|
||||
expr: |
|
||||
(
|
||||
sum by (job, instance, type) (
|
||||
rate(promscale_ingest_requests_total{code=~"5.."}[5m])
|
||||
)
|
||||
/
|
||||
sum by (job, instance, type) (
|
||||
rate(promscale_ingest_requests_total[5m])
|
||||
)
|
||||
) > 0.05
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PromscaleIngestHighErrorRate
|
||||
annotations:
|
||||
description: Promscale ingestion is having a {{ $value | humanizePercentage
|
||||
}} error rate.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleIngestHighErrorRate.md
|
||||
summary: High error rate in Promscale ingestion
|
||||
expr: |
|
||||
(
|
||||
sum by (job, instance, type) (
|
||||
rate(promscale_ingest_requests_total{code=~"5.."}[5m])
|
||||
)
|
||||
/
|
||||
sum by (job, instance, type) (
|
||||
rate(promscale_ingest_requests_total[5m])
|
||||
)
|
||||
) > 0.1
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: PromscaleIngestHighLatency
|
||||
annotations:
|
||||
description: Slowest 10% of ingestion batch took more than {{ $value }} seconds
|
||||
to ingest.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleIngestHighLatency.md
|
||||
summary: Slow Promscale ingestion
|
||||
expr: |
|
||||
(
|
||||
histogram_quantile(
|
||||
0.90,
|
||||
sum by (job, instance, type, le) (
|
||||
rate(promscale_ingest_duration_seconds_bucket[5m])
|
||||
)
|
||||
) > 10
|
||||
and
|
||||
sum by (job, instance, type) (
|
||||
rate(promscale_ingest_duration_seconds_bucket[5m])
|
||||
)
|
||||
) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PromscaleIngestHighLatency
|
||||
annotations:
|
||||
description: Slowest 10% of ingestion batch took more than {{ $value }} seconds
|
||||
to ingest.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleIngestHighLatency.md
|
||||
summary: Slow Promscale ingestion
|
||||
expr: |
|
||||
(
|
||||
histogram_quantile(
|
||||
0.90,
|
||||
sum by (job, instance, type, le) (
|
||||
rate(promscale_ingest_duration_seconds_bucket[5m])
|
||||
)
|
||||
) > 30
|
||||
and
|
||||
sum by (job, instance, type) (
|
||||
rate(promscale_ingest_duration_seconds_bucket[5m])
|
||||
)
|
||||
) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- name: promscale-query
|
||||
rules:
|
||||
- alert: PromscaleQueryHighErrorRate
|
||||
annotations:
|
||||
description: Evaluating queries via Promscale has {{ $value | humanizePercentage
|
||||
}} error rate.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleQueryHighErrorRate.md
|
||||
summary: High error rate in querying Promscale
|
||||
expr: |
|
||||
(
|
||||
sum by (job, instance, type) (
|
||||
rate(promscale_query_requests_total{code=~"5.."}[5m])
|
||||
)
|
||||
/
|
||||
sum by (job, instance, type) (
|
||||
rate(promscale_query_requests_total[5m])
|
||||
)
|
||||
) > 0.05
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PromscaleQueryHighErrorRate
|
||||
annotations:
|
||||
description: Evaluating queries via Promscale had {{ $value | humanizePercentage
|
||||
}} error rate.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleQueryHighErrorRate.md
|
||||
summary: High error rate in querying Promscale
|
||||
expr: |
|
||||
(
|
||||
sum by (job, instance, type) (
|
||||
rate(promscale_query_requests_total{code=~"5.."}[5m])
|
||||
)
|
||||
/
|
||||
sum by (job, instance, type) (
|
||||
rate(promscale_query_requests_total[5m])
|
||||
)
|
||||
) > 0.1
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: PromscaleQueryHighLatency
|
||||
annotations:
|
||||
description: Slowest 10% of the queries took more than {{ $value }} seconds
|
||||
to evaluate.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleQueryHighLatency.md
|
||||
summary: Slow Promscale querying
|
||||
expr: |
|
||||
(
|
||||
histogram_quantile(
|
||||
0.90,
|
||||
sum by (job, instance, type, le) (
|
||||
rate(promscale_query_duration_seconds_bucket[5m])
|
||||
)
|
||||
) > 5
|
||||
and
|
||||
sum by (job, instance, type) (
|
||||
rate(promscale_query_duration_seconds_bucket[5m])
|
||||
) > 0
|
||||
)
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PromscaleQueryHighLatency
|
||||
annotations:
|
||||
description: Slowest 10% of the queries took {{ $value }} seconds to evaluate.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleQueryHighLatency.md
|
||||
summary: Slow Promscale querying
|
||||
expr: |
|
||||
(
|
||||
histogram_quantile(
|
||||
0.90,
|
||||
sum by (job, instance, type, le) (
|
||||
rate(promscale_query_duration_seconds_bucket[5m])
|
||||
)
|
||||
) > 10
|
||||
and
|
||||
sum by (job, instance, type) (
|
||||
rate(promscale_query_duration_seconds_bucket[5m])
|
||||
) > 0
|
||||
)
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- name: promscale-cache
|
||||
rules:
|
||||
- alert: PromscaleCacheHighNumberOfEvictions
|
||||
annotations:
|
||||
description: Promscale {{ $labels.name }} is evicting at {{ $value }} entries
|
||||
a second.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleCacheHighNumberOfEvictions.md
|
||||
summary: High cache eviction in Promscale
|
||||
expr: |
|
||||
(
|
||||
sum by (job, instance, name, type) (
|
||||
rate(promscale_cache_evictions_total[5m])
|
||||
)
|
||||
/
|
||||
sum by (job, instance, name, type) (
|
||||
promscale_cache_capacity_elements
|
||||
)
|
||||
) > 0.2
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PromscaleCacheTooSmall
|
||||
annotations:
|
||||
description: Promscale {{ $labels.name }} has a hit ratio of {{ $value | humanizePercentage
|
||||
}}.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleCacheTooSmall.md
|
||||
summary: High cache eviction in Promscale
|
||||
expr: |
|
||||
(
|
||||
sum by (job, instance, type, name) (
|
||||
rate(promscale_cache_query_hits_total[5m])
|
||||
)
|
||||
/
|
||||
sum by (job, instance, type, name) (
|
||||
rate(promscale_cache_queries_total[5m])
|
||||
)
|
||||
) < 0.9
|
||||
labels:
|
||||
severity: warning
|
||||
- name: promscale-database-connection
|
||||
rules:
|
||||
- alert: PromscaleStorageHighErrorRate
|
||||
annotations:
|
||||
description: Promscale connection with the database has an error of {{ $value
|
||||
| humanizePercentage }}.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleStorageHighErrorRate.md
|
||||
summary: Promscale experiences a high error rate when connecting to the database
|
||||
expr: |
|
||||
(
|
||||
sum by (job) (
|
||||
# Error counter exists for query, query_row & exec, and not for send_batch.
|
||||
rate(promscale_database_request_errors_total{method=~"query.*|exec"}[5m])
|
||||
)
|
||||
/
|
||||
sum by (job) (
|
||||
rate(promscale_database_requests_total{method=~"query.*|exec"}[5m])
|
||||
)
|
||||
) > 0.05
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PromscaleStorageHighLatency
|
||||
annotations:
|
||||
description: Slowest 10% of database requests are taking more than {{ $value
|
||||
}} seconds to respond.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleStorageHighLatency.md
|
||||
summary: Slow database response
|
||||
expr: |
|
||||
(
|
||||
histogram_quantile(0.9,
|
||||
sum by (le, job, type) (
|
||||
rate(promscale_database_requests_duration_seconds_bucket[5m])
|
||||
)
|
||||
) > 5
|
||||
and
|
||||
sum by (job, type) (
|
||||
rate(promscale_database_requests_duration_seconds_count[5m])
|
||||
) > 0
|
||||
)
|
||||
labels:
|
||||
severity: warning
|
||||
- name: promscale-database
|
||||
rules:
|
||||
- alert: PromscaleStorageUnhealthy
|
||||
annotations:
|
||||
description: Promscale connection with the database has an error of {{ $value
|
||||
| humanizePercentage }}.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleStorageUnhealthy.md
|
||||
summary: Promscale database is unhealthy
|
||||
expr: |
|
||||
(
|
||||
sum by (job) (
|
||||
rate(promscale_sql_database_health_check_errors_total[5m])
|
||||
)
|
||||
/
|
||||
sum by (job) (
|
||||
rate(promscale_sql_database_health_check_total[5m])
|
||||
)
|
||||
) > 0.05
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PromscaleMaintenanceJobRunningTooLong
|
||||
annotations:
|
||||
description: Promscale Database is taking {{ $value }} seconds to respond to
|
||||
Promscale's requests.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleMaintenanceJobRunningTooLong.md
|
||||
summary: Promscale maintenance jobs taking too long to complete
|
||||
expr: |
|
||||
(
|
||||
(
|
||||
(
|
||||
time()
|
||||
-
|
||||
promscale_sql_database_worker_maintenance_job_start_timestamp_seconds
|
||||
)
|
||||
>
|
||||
30 * 60 * 2 # 30 mins (we launch maintenance jobs scheduled at 30 mins) * 60 (to seconds) * 2 (wait max for 2 complete scans before firing alert).
|
||||
)
|
||||
and
|
||||
promscale_sql_database_worker_maintenance_job_start_timestamp_seconds > 0
|
||||
)
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PromscaleMaintenanceJobFailures
|
||||
annotations:
|
||||
description: Promscale maintenance job failed to successfully execute.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleMaintenanceJobFailures.md
|
||||
summary: Promscale maintenance job failed
|
||||
expr: promscale_sql_database_worker_maintenance_job_failed == 1
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PromscaleCompressionLow
|
||||
annotations:
|
||||
description: High uncompressed data in Promscale, on average, {{ $value }} uncompressed
|
||||
chunks per metric.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleCompressionLow.md
|
||||
summary: High uncompressed data
|
||||
expr: |
|
||||
(
|
||||
(
|
||||
(promscale_sql_database_chunks_count - promscale_sql_database_chunks_compressed_count) # Number of uncompressed chunks.
|
||||
/
|
||||
promscale_sql_database_metric_count
|
||||
) > 4 # If total number of average uncompressed chunk per metric is more than 4 chunks at maximum, we should alert.
|
||||
and
|
||||
promscale_sql_database_compression_status == 1
|
||||
)
|
||||
labels:
|
||||
severity: warning
|
308
assets/promscale/dashboards/apm-dependencies.json
Normal file
308
assets/promscale/dashboards/apm-dependencies.json
Normal file
|
@ -0,0 +1,308 @@
|
|||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"description": "",
|
||||
"label": "TimescaleDB / PostgreSQL data source",
|
||||
"name": "DS_TIMESCALEDB",
|
||||
"pluginId": "postgres",
|
||||
"pluginName": "PostgreSQL",
|
||||
"type": "datasource"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [ ],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": 11,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": false,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"promscale",
|
||||
"apm"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Menu",
|
||||
"tooltip": "",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "auto",
|
||||
"displayMode": "auto"
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Total exec time"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "ms"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Avg exec time"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "ms"
|
||||
},
|
||||
{
|
||||
"id": "decimals",
|
||||
"value": 2
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Source"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "links",
|
||||
"value": [
|
||||
{
|
||||
"title": "Show service overview",
|
||||
"url": "/d/YWfN6wL7z/?var-service=${__value.raw}"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Target"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "links",
|
||||
"value": [
|
||||
{
|
||||
"title": "Show service overview",
|
||||
"url": "/d/YWfN6wL7z/?var-service=${__value.raw}"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"footer": {
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true
|
||||
},
|
||||
"pluginVersion": "8.3.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"format": "table",
|
||||
"group": [ ],
|
||||
"metricColumn": "none",
|
||||
"rawQuery": true,
|
||||
"rawSql": "SELECT\n p.service_name as \"Source\",\n k.service_name as \"Target\",\n k.span_name as \"Operation\",\n count(*) as \"Calls\",\n sum(k.duration_ms) as \"Total exec time\",\n avg(k.duration_ms) as \"Avg exec time\"\nFROM ps_trace.span p\nINNER JOIN ps_trace.span k\nON (p.trace_id = k.trace_id\nAND p.span_id = k.parent_span_id\nAND p.service_name != k.service_name)\nWHERE p.start_time >= NOW() - INTERVAL '10 minutes'\nAND k.start_time >= NOW() - INTERVAL '10 minutes'\nGROUP BY 1, 2, 3\nORDER BY 5 DESC",
|
||||
"refId": "A",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"span_duration_ms"
|
||||
],
|
||||
"type": "column"
|
||||
}
|
||||
]
|
||||
],
|
||||
"table": "event",
|
||||
"timeColumn": "\"time\"",
|
||||
"timeColumnType": "timestamp",
|
||||
"where": [
|
||||
{
|
||||
"name": "$__timeFilter",
|
||||
"params": [ ],
|
||||
"type": "macro"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Service Dependencies (last 10 minutes)",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"description": "This maps shows all services sending traces and the interactions between them.\nEach arrow represents a service calling a specific operation in another service. The legend in the arrows includes the requests per second for that interaction. If you are downsampling your traces before sending them to Promscale, then the number of requests per second will not be accurate but you'll be able to see how it compares to other operations.",
|
||||
"gridPos": {
|
||||
"h": 19,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 2,
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"format": "table",
|
||||
"group": [ ],
|
||||
"metricColumn": "none",
|
||||
"rawQuery": true,
|
||||
"rawSql": "SELECT \n service_name as id,\n service_name as title\nFROM ps_trace.span\nWHERE start_time >= NOW() - INTERVAL '10 minutes'\nGROUP BY service_name",
|
||||
"refId": "A",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"span_duration_ms"
|
||||
],
|
||||
"type": "column"
|
||||
}
|
||||
]
|
||||
],
|
||||
"table": "event",
|
||||
"timeColumn": "\"time\"",
|
||||
"timeColumnType": "timestamp",
|
||||
"where": [
|
||||
{
|
||||
"name": "$__timeFilter",
|
||||
"params": [ ],
|
||||
"type": "macro"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"format": "table",
|
||||
"group": [ ],
|
||||
"hide": false,
|
||||
"metricColumn": "none",
|
||||
"rawQuery": true,
|
||||
"rawSql": "SELECT\n p.service_name || '->' || k.service_name || ':' || k.span_name as id,\n p.service_name as source,\n k.service_name as target,\n k.span_name as \"mainStat\",\n count(*) as \"secondaryStat\"\nFROM ps_trace.span p\nINNER JOIN ps_trace.span k\nON (p.trace_id = k.trace_id\nAND p.span_id = k.parent_span_id\nAND p.service_name != k.service_name)\nWHERE p.start_time >= NOW() - INTERVAL '10 minutes'\nAND k.start_time >= NOW() - INTERVAL '10 minutes'\nGROUP BY 1, 2, 3, 4",
|
||||
"refId": "B",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"span_duration_ms"
|
||||
],
|
||||
"type": "column"
|
||||
}
|
||||
]
|
||||
],
|
||||
"table": "event",
|
||||
"timeColumn": "\"time\"",
|
||||
"timeColumnType": "timestamp",
|
||||
"where": [
|
||||
{
|
||||
"name": "$__timeFilter",
|
||||
"params": [ ],
|
||||
"type": "macro"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Service Map (last 10 minutes)",
|
||||
"type": "nodeGraph"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 34,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"promscale",
|
||||
"apm"
|
||||
],
|
||||
"templating": {
|
||||
"list": [ ]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"hidden": true
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "[3] Service Map",
|
||||
"uid": "K03UKvPnz",
|
||||
"version": 13,
|
||||
"weekStart": ""
|
||||
}
|
531
assets/promscale/dashboards/apm-home.json
Normal file
531
assets/promscale/dashboards/apm-home.json
Normal file
|
@ -0,0 +1,531 @@
|
|||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"description": "",
|
||||
"label": "TimescaleDB / PostgreSQL data source",
|
||||
"name": "DS_TIMESCALEDB",
|
||||
"pluginId": "postgres",
|
||||
"pluginName": "PostgreSQL",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"description": "",
|
||||
"label": "Promscale Jaeger Tracing data source",
|
||||
"name": "DS_PROMSCALE_JAEGER",
|
||||
"pluginId": "jaeger",
|
||||
"pluginName": "Jaeger",
|
||||
"type": "datasource"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [ ],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": 10,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": false,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"promscale",
|
||||
"apm"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Menu",
|
||||
"tooltip": "",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
},
|
||||
{
|
||||
"asDropdown": false,
|
||||
"icon": "doc",
|
||||
"includeVars": false,
|
||||
"keepTime": false,
|
||||
"tags": [ ],
|
||||
"targetBlank": true,
|
||||
"title": "Documentation",
|
||||
"tooltip": "",
|
||||
"type": "link",
|
||||
"url": "https://docs.timescale.com/promscale/latest/"
|
||||
}
|
||||
],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "auto",
|
||||
"displayMode": "auto"
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Requests"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "reqps"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Avg Duration"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "ms"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "p90 Duration"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "ms"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Error rate"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "percentunit"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Service"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "links",
|
||||
"value": [
|
||||
{
|
||||
"title": "Show service overview",
|
||||
"url": "/d/YWfN6wL7z/?var-service=${__value.raw}"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 24,
|
||||
"w": 13,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"footer": {
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": false,
|
||||
"displayName": "Avg Duration"
|
||||
}
|
||||
]
|
||||
},
|
||||
"pluginVersion": "8.3.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"format": "table",
|
||||
"group": [ ],
|
||||
"hide": false,
|
||||
"metricColumn": "none",
|
||||
"rawQuery": true,
|
||||
"rawSql": "SELECT\n service_name AS \"Service\",\n COUNT(*)::numeric / (30 * 60) AS \"Requests\",\n AVG(duration_ms) AS \"Avg Duration\",\n ROUND(approx_percentile(0.90, percentile_agg(duration_ms))::numeric, 3) AS \"p90 Duration\",\n (count(*) filter (where status_code = 'STATUS_CODE_ERROR')::numeric / count(*)) AS \"Error rate\"\nFROM ps_trace.span s\nWHERE start_time > NOW() - INTERVAL '30m'\nAND (span_kind = 'SPAN_KIND_SERVER' OR parent_span_id is NULL)\nGROUP BY 1\nORDER BY 2",
|
||||
"refId": "A",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"span_duration_ms"
|
||||
],
|
||||
"type": "column"
|
||||
}
|
||||
]
|
||||
],
|
||||
"table": "event",
|
||||
"timeColumn": "\"time\"",
|
||||
"timeColumnType": "timestamp",
|
||||
"where": [
|
||||
{
|
||||
"name": "$__timeFilter",
|
||||
"params": [ ],
|
||||
"type": "macro"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Services (Last 30 minutes)",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "auto",
|
||||
"displayMode": "auto"
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Duration"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "ms"
|
||||
},
|
||||
{
|
||||
"id": "decimals",
|
||||
"value": 2
|
||||
},
|
||||
{
|
||||
"id": "custom.width",
|
||||
"value": 143
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Trace ID"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.width",
|
||||
"value": 282
|
||||
},
|
||||
{
|
||||
"id": "links",
|
||||
"value": [
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "View trace details",
|
||||
"url": "/explore?left=%5B%22${__from}%22,%22${__to}%22,%22${DS_PROMSCALE_JAEGER}%22,%7B\"query\":\"${__value.raw}\"%7D%5D"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "start_time"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.width",
|
||||
"value": 182
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Trace ID"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.width",
|
||||
"value": 94
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Service"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "links",
|
||||
"value": [
|
||||
{
|
||||
"title": "Show service overview",
|
||||
"url": "/d/YWfN6wL7z/?var-service=${__value.raw}"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 12,
|
||||
"w": 11,
|
||||
"x": 13,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"footer": {
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true,
|
||||
"sortBy": [ ]
|
||||
},
|
||||
"pluginVersion": "8.3.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"format": "table",
|
||||
"group": [ ],
|
||||
"metricColumn": "none",
|
||||
"rawQuery": true,
|
||||
"rawSql": "SELECT\n replace(trace_id::text, '-'::text, ''::text) as \"Trace ID\",\n service_name as \"Service\",\n span_name as \"Operation\",\n start_time as \"Time\",\n duration_ms as \"Duration\" \nFROM ps_trace.span\nWHERE start_time > NOW() - INTERVAL '30m'\nAND parent_span_id is null\nORDER BY duration_ms DESC\nLIMIT 50\n;",
|
||||
"refId": "A",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"span_duration_ms"
|
||||
],
|
||||
"type": "column"
|
||||
}
|
||||
]
|
||||
],
|
||||
"table": "event",
|
||||
"timeColumn": "\"time\"",
|
||||
"timeColumnType": "timestamp",
|
||||
"where": [
|
||||
{
|
||||
"name": "$__timeFilter",
|
||||
"params": [ ],
|
||||
"type": "macro"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Slowest Requests (last 30 minutes)",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "auto",
|
||||
"displayMode": "auto"
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Service"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "links",
|
||||
"value": [
|
||||
{
|
||||
"title": "Show service overview",
|
||||
"url": "/d/YWfN6wL7z/?var-service=${__value.raw}"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 12,
|
||||
"w": 11,
|
||||
"x": 13,
|
||||
"y": 12
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"footer": {
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true,
|
||||
"sortBy": [ ]
|
||||
},
|
||||
"pluginVersion": "8.3.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"format": "table",
|
||||
"group": [ ],
|
||||
"metricColumn": "none",
|
||||
"rawQuery": true,
|
||||
"rawSql": "SELECT\n status_message as \"Error\",\n service_name as \"Service\",\n count(*) as \"Occurrences\" \nFROM ps_trace.span\nWHERE start_time > NOW() - INTERVAL '30m'\nAND status_code = 'STATUS_CODE_ERROR'\nGROUP BY 1, 2\nORDER BY 3\n;",
|
||||
"refId": "A",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"span_duration_ms"
|
||||
],
|
||||
"type": "column"
|
||||
}
|
||||
]
|
||||
],
|
||||
"table": "event",
|
||||
"timeColumn": "\"time\"",
|
||||
"timeColumnType": "timestamp",
|
||||
"where": [
|
||||
{
|
||||
"name": "$__timeFilter",
|
||||
"params": [ ],
|
||||
"type": "macro"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Most Common Errors (last 30 minutes)",
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 34,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"promscale",
|
||||
"apm"
|
||||
],
|
||||
"templating": {
|
||||
"list": [ ]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"hidden": true
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "[1] Overview",
|
||||
"uid": "vBhEewLnk",
|
||||
"version": 35,
|
||||
"weekStart": ""
|
||||
}
|
|
@ -0,0 +1,202 @@
|
|||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"description": "",
|
||||
"label": "TimescaleDB / PostgreSQL data source",
|
||||
"name": "DS_TIMESCALEDB",
|
||||
"pluginId": "postgres",
|
||||
"pluginName": "PostgreSQL",
|
||||
"type": "datasource"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [ ],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": 13,
|
||||
"iteration": 1647423383157,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": false,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"promscale",
|
||||
"apm"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Menu",
|
||||
"tooltip": "",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"description": "A.K.A. \"Who do I call?\"",
|
||||
"gridPos": {
|
||||
"h": 20,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"format": "table",
|
||||
"group": [ ],
|
||||
"metricColumn": "none",
|
||||
"rawQuery": true,
|
||||
"rawSql": "WITH RECURSIVE x AS\n(\n SELECT\n trace_id,\n span_id,\n parent_span_id,\n service_name,\n span_name\n FROM ps_trace.span\n WHERE start_time > NOW() - INTERVAL '10 minutes'\n AND service_name = '${service}'\n AND span_name = '${operation}'\n UNION ALL\n SELECT\n s.trace_id,\n s.span_id,\n s.parent_span_id,\n s.service_name,\n s.span_name\n FROM x\n INNER JOIN ps_trace.span s\n ON (x.trace_id = s.trace_id\n AND x.span_id = s.parent_span_id)\n AND s.start_time > NOW() - INTERVAL '10 minutes'\n)\nSELECT\n md5(service_name || '-' || span_name) as id,\n span_name as title,\n service_name as \"subTitle\",\n count(*) as \"mainStat\"\nFROM x\nGROUP BY service_name, span_name",
|
||||
"refId": "A",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"span_duration_ms"
|
||||
],
|
||||
"type": "column"
|
||||
}
|
||||
]
|
||||
],
|
||||
"table": "event",
|
||||
"timeColumn": "\"time\"",
|
||||
"timeColumnType": "timestamp",
|
||||
"where": [
|
||||
{
|
||||
"name": "$__timeFilter",
|
||||
"params": [ ],
|
||||
"type": "macro"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"format": "table",
|
||||
"group": [ ],
|
||||
"hide": false,
|
||||
"metricColumn": "none",
|
||||
"rawQuery": true,
|
||||
"rawSql": "WITH RECURSIVE x AS\n(\n SELECT\n trace_id,\n span_id,\n parent_span_id,\n service_name,\n span_name,\n null::text as id,\n null::text as source,\n null::text as target\n FROM ps_trace.span\n WHERE start_time > NOW() - INTERVAL '10 minutes'\n AND service_name = '${service}'\n AND span_name = '${operation}'\n UNION ALL\n SELECT\n s.trace_id,\n s.span_id,\n s.parent_span_id,\n s.service_name,\n s.span_name,\n md5(s.service_name || '-' || s.span_name || '-' || x.service_name || '-' || x.span_name) as id,\n md5(x.service_name || '-' || x.span_name) as source,\n md5(s.service_name || '-' || s.span_name) as target\n FROM x\n INNER JOIN ps_trace.span s\n ON (x.trace_id = s.trace_id\n AND x.span_id = s.parent_span_id)\n AND s.start_time > NOW() - INTERVAL '10 minutes'\n)\nSELECT DISTINCT\n x.id,\n x.source,\n x.target \nFROM x\nWHERE id is not null",
|
||||
"refId": "B",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"span_duration_ms"
|
||||
],
|
||||
"type": "column"
|
||||
}
|
||||
]
|
||||
],
|
||||
"table": "event",
|
||||
"timeColumn": "\"time\"",
|
||||
"timeColumnType": "timestamp",
|
||||
"where": [
|
||||
{
|
||||
"name": "$__timeFilter",
|
||||
"params": [ ],
|
||||
"type": "macro"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Map of Downstream Dependencies (last 10 minutes)",
|
||||
"transformations": [ ],
|
||||
"type": "nodeGraph"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 34,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"promscale",
|
||||
"apm"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"allValue": "ALL",
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"definition": "SELECT DISTINCT service_name FROM ps_trace.span WHERE start_time > NOW() - INTERVAL '10 minutes'\n",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Service",
|
||||
"multi": false,
|
||||
"name": "service",
|
||||
"options": [ ],
|
||||
"query": "SELECT DISTINCT service_name FROM ps_trace.span WHERE start_time > NOW() - INTERVAL '10 minutes'\n",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": "ALL",
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"definition": "SELECT DISTINCT span_name FROM ps_trace.span WHERE service_name = ${service:sqlstring} AND start_time > NOW() - INTERVAL '10 minutes'\n)",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Operation",
|
||||
"multi": false,
|
||||
"name": "operation",
|
||||
"options": [ ],
|
||||
"query": "SELECT DISTINCT span_name FROM ps_trace.span WHERE service_name = ${service:sqlstring} AND start_time > NOW() - INTERVAL '10 minutes'\n",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"hidden": true
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "[4] Downstream Dependencies",
|
||||
"uid": "SDJmJvPnz",
|
||||
"version": 7,
|
||||
"weekStart": ""
|
||||
}
|
|
@ -0,0 +1,203 @@
|
|||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"description": "",
|
||||
"label": "TimescaleDB / PostgreSQL data source",
|
||||
"name": "DS_TIMESCALEDB",
|
||||
"pluginId": "postgres",
|
||||
"pluginName": "PostgreSQL",
|
||||
"type": "datasource"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [ ],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Map of upstream service dependencies for a specific service and operation",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": 12,
|
||||
"iteration": 1647519937731,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": false,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"promscale",
|
||||
"apm"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Menu",
|
||||
"tooltip": "",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"description": "A.K.A. \"Who called me?\"",
|
||||
"gridPos": {
|
||||
"h": 26,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"format": "table",
|
||||
"group": [ ],
|
||||
"metricColumn": "none",
|
||||
"rawQuery": true,
|
||||
"rawSql": "WITH RECURSIVE x AS\n(\n SELECT\n trace_id,\n span_id,\n parent_span_id,\n service_name,\n span_name\n FROM ps_trace.span\n WHERE start_time > NOW() - INTERVAL '10 minutes'\n AND service_name = '${service}'\n AND span_name = '${operation}'\n UNION ALL\n SELECT\n s.trace_id,\n s.span_id,\n s.parent_span_id,\n s.service_name,\n s.span_name\n FROM x\n INNER JOIN ps_trace.span s\n ON (x.trace_id = s.trace_id\n AND x.parent_span_id = s.span_id)\n AND s.start_time > NOW() - INTERVAL '10 minutes'\n)\nSELECT\n md5(service_name || '-' || span_name) as id,\n span_name as title,\n service_name as \"subTitle\",\n count(*) as \"mainStat\"\nFROM x\nGROUP BY service_name, span_name",
|
||||
"refId": "A",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"span_duration_ms"
|
||||
],
|
||||
"type": "column"
|
||||
}
|
||||
]
|
||||
],
|
||||
"table": "event",
|
||||
"timeColumn": "\"time\"",
|
||||
"timeColumnType": "timestamp",
|
||||
"where": [
|
||||
{
|
||||
"name": "$__timeFilter",
|
||||
"params": [ ],
|
||||
"type": "macro"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"format": "table",
|
||||
"group": [ ],
|
||||
"hide": false,
|
||||
"metricColumn": "none",
|
||||
"rawQuery": true,
|
||||
"rawSql": "WITH RECURSIVE x AS\n(\n SELECT\n trace_id,\n span_id,\n parent_span_id,\n service_name,\n span_name,\n null::text as id,\n null::text as target,\n null::text as source\n FROM ps_trace.span\n WHERE start_time > NOW() - INTERVAL '10 minutes'\n AND service_name = '${service}'\n AND span_name = '${operation}'\n UNION ALL\n SELECT\n s.trace_id,\n s.span_id,\n s.parent_span_id,\n s.service_name,\n s.span_name,\n md5(s.service_name || '-' || s.span_name || '-' || x.service_name || '-' || x.span_name) as id,\n md5(x.service_name || '-' || x.span_name) as target,\n md5(s.service_name || '-' || s.span_name) as source\n FROM x\n INNER JOIN ps_trace.span s\n ON (x.trace_id = s.trace_id\n AND x.parent_span_id = s.span_id)\n AND s.start_time > NOW() - INTERVAL '10 minutes'\n)\nSELECT DISTINCT\n x.id,\n x.target,\n x.source \nFROM x\nWHERE id is not null",
|
||||
"refId": "B",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"span_duration_ms"
|
||||
],
|
||||
"type": "column"
|
||||
}
|
||||
]
|
||||
],
|
||||
"table": "event",
|
||||
"timeColumn": "\"time\"",
|
||||
"timeColumnType": "timestamp",
|
||||
"where": [
|
||||
{
|
||||
"name": "$__timeFilter",
|
||||
"params": [ ],
|
||||
"type": "macro"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Map of Upstream Depencies (last 10 minutes)",
|
||||
"transformations": [ ],
|
||||
"type": "nodeGraph"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 34,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"promscale",
|
||||
"apm"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"allValue": "ALL",
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"definition": "SELECT DISTINCT service_name FROM ps_trace.span WHERE start_time > NOW() - INTERVAL '10 minutes'\n",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Service",
|
||||
"multi": false,
|
||||
"name": "service",
|
||||
"options": [ ],
|
||||
"query": "SELECT DISTINCT service_name FROM ps_trace.span WHERE start_time > NOW() - INTERVAL '10 minutes'\n",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": "ALL",
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"definition": "SELECT DISTINCT span_name FROM ps_trace.span WHERE service_name = ${service:sqlstring} AND start_time > NOW() - INTERVAL '10 minutes'\n)",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Operation",
|
||||
"multi": false,
|
||||
"name": "operation",
|
||||
"options": [ ],
|
||||
"query": "SELECT DISTINCT span_name FROM ps_trace.span WHERE service_name = ${service:sqlstring} AND start_time > NOW() - INTERVAL '10 minutes'\n",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"hidden": true
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "[5] Upstream Dependencies",
|
||||
"uid": "o4PPTDPnz",
|
||||
"version": 14,
|
||||
"weekStart": ""
|
||||
}
|
751
assets/promscale/dashboards/apm-service-overview.json
Normal file
751
assets/promscale/dashboards/apm-service-overview.json
Normal file
|
@ -0,0 +1,751 @@
|
|||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"description": "",
|
||||
"label": "TimescaleDB / PostgreSQL data source",
|
||||
"name": "DS_TIMESCALEDB",
|
||||
"pluginId": "postgres",
|
||||
"pluginName": "PostgreSQL",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"description": "",
|
||||
"label": "Promscale Jaeger Tracing data source",
|
||||
"name": "DS_PROMSCALE_JAEGER",
|
||||
"pluginId": "jaeger",
|
||||
"pluginName": "Jaeger",
|
||||
"type": "datasource"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [ ],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 2,
|
||||
"id": 9,
|
||||
"iteration": 1647523274899,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": false,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"promscale",
|
||||
"apm"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Menu",
|
||||
"tooltip": "",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "reqps"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 11,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"interval": "1s",
|
||||
"maxDataPoints": 300,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "8.3.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"format": "time_series",
|
||||
"group": [ ],
|
||||
"metricColumn": "none",
|
||||
"rawQuery": true,
|
||||
"rawSql": "SELECT\n time_bucket_gapfill('$__interval', start_time) AS time,\n coalesce(count(*)::numeric / (EXTRACT(epoch FROM '$__interval'::interval)), 0) AS \"Requests\"\nFROM ps_trace.span s\nWHERE $__timeFilter(start_time)\nAND (span_kind = 'SPAN_KIND_SERVER' OR parent_span_id is NULL)\nAND service_name = '${service}'\nGROUP BY 1\nORDER BY 1",
|
||||
"refId": "A",
|
||||
"select": [ ],
|
||||
"table": "event",
|
||||
"timeColumn": "\"time\"",
|
||||
"timeColumnType": "timestamp",
|
||||
"where": [
|
||||
{
|
||||
"name": "$__timeFilter",
|
||||
"params": [ ],
|
||||
"type": "macro"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Requests",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "ms"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 11,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"interval": "1s",
|
||||
"maxDataPoints": 300,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "8.3.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"format": "time_series",
|
||||
"group": [ ],
|
||||
"metricColumn": "none",
|
||||
"rawQuery": true,
|
||||
"rawSql": "SELECT\n time_bucket_gapfill('$__interval', start_time) AS time,\n COALESCE(ROUND(approx_percentile(0.99, percentile_agg(duration_ms))::numeric, 3), 0) as \"p99\",\n COALESCE(ROUND(approx_percentile(0.90, percentile_agg(duration_ms))::numeric, 3), 0) as \"p90\",\n COALESCE(ROUND(approx_percentile(0.50, percentile_agg(duration_ms))::numeric, 3), 0) as \"p50\",\n COALESCE(AVG(duration_ms), 0) as \"Average\"\nFROM ps_trace.span s\nWHERE $__timeFilter(start_time)\nAND (span_kind = 'SPAN_KIND_SERVER' OR parent_span_id is NULL)\nAND service_name = '${service}'\nGROUP BY 1\nORDER BY 1",
|
||||
"refId": "A",
|
||||
"select": [ ],
|
||||
"table": "event",
|
||||
"timeColumn": "\"time\"",
|
||||
"timeColumnType": "timestamp",
|
||||
"where": [
|
||||
{
|
||||
"name": "$__timeFilter",
|
||||
"params": [ ],
|
||||
"type": "macro"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Duration",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"axisSoftMax": 1,
|
||||
"axisSoftMin": 0,
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "line"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percentunit"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 11,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"interval": "1s",
|
||||
"maxDataPoints": 300,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"format": "time_series",
|
||||
"group": [ ],
|
||||
"metricColumn": "none",
|
||||
"rawQuery": true,
|
||||
"rawSql": "SELECT\n time_bucket('$__interval', start_time) as time,\n coalesce(count(*) filter (where status_code = 'STATUS_CODE_ERROR')::numeric / count(*), 0) as \"Error rate\"\nFROM ps_trace.span s\nWHERE $__timeFilter(start_time)\nAND (span_kind = 'SPAN_KIND_SERVER' OR parent_span_id is NULL)\nAND service_name = '${service}'\nGROUP BY 1\nORDER BY 1",
|
||||
"refId": "A",
|
||||
"select": [ ],
|
||||
"table": "event",
|
||||
"timeColumn": "\"time\"",
|
||||
"timeColumnType": "timestamp",
|
||||
"where": [
|
||||
{
|
||||
"name": "$__timeFilter",
|
||||
"params": [ ],
|
||||
"type": "macro"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Error Rate",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "auto",
|
||||
"displayMode": "auto"
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Requests"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "reqps"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Avg Duration"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "ms"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Error rate"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "percentunit"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 12,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 11
|
||||
},
|
||||
"id": 10,
|
||||
"options": {
|
||||
"footer": {
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true,
|
||||
"sortBy": [ ]
|
||||
},
|
||||
"pluginVersion": "8.3.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"format": "table",
|
||||
"group": [ ],
|
||||
"metricColumn": "none",
|
||||
"rawQuery": true,
|
||||
"rawSql": "SELECT\n span_name as \"Operation\",\n count(*)::numeric / (${__to:date:seconds} - ${__from:date:seconds}) AS \"Requests\",\n sum(duration_ms) / count(*)::numeric as \"Avg Duration\",\n coalesce((count(*) filter (where status_code = 'STATUS_CODE_ERROR')::numeric / count(*)), 0) as \"Error rate\"\nFROM ps_trace.span s\nWHERE $__timeFilter(start_time)\nAND (span_kind = 'SPAN_KIND_SERVER' OR parent_span_id is NULL)\nAND service_name = '${service}'\nGROUP BY 1\nORDER BY 1",
|
||||
"refId": "A",
|
||||
"select": [ ],
|
||||
"table": "event",
|
||||
"timeColumn": "\"time\"",
|
||||
"timeColumnType": "timestamp",
|
||||
"where": [
|
||||
{
|
||||
"name": "$__timeFilter",
|
||||
"params": [ ],
|
||||
"type": "macro"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Statistics by Operation",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "auto",
|
||||
"displayMode": "auto"
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Duration"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "ms"
|
||||
},
|
||||
{
|
||||
"id": "decimals",
|
||||
"value": 2
|
||||
},
|
||||
{
|
||||
"id": "custom.width",
|
||||
"value": 143
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Trace ID"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.width",
|
||||
"value": 282
|
||||
},
|
||||
{
|
||||
"id": "links",
|
||||
"value": [
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "View trace details",
|
||||
"url": "/explore?left=%5B%22${__from}%22,%22${__to}%22,%22${DS_PROMSCALE_JAEGER}%22,%7B\"query\":\"${__value.raw}\"%7D%5D"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "start_time"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.width",
|
||||
"value": 182
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Trace ID"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.width",
|
||||
"value": 94
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 12,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 11
|
||||
},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"footer": {
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true,
|
||||
"sortBy": [ ]
|
||||
},
|
||||
"pluginVersion": "8.3.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"format": "table",
|
||||
"group": [ ],
|
||||
"metricColumn": "none",
|
||||
"rawQuery": true,
|
||||
"rawSql": "SELECT\n replace(trace_id::text, '-'::text, ''::text) as \"Trace ID\",\n span_name as \"Operation\",\n start_time as \"Time\",\n duration_ms as \"Duration\"\nFROM ps_trace.span\nWHERE $__timeFilter(start_time)\nAND (span_kind = 'SPAN_KIND_SERVER' OR parent_span_id is NULL)\nAND service_name = '${service}'\nORDER BY duration_ms DESC\nLIMIT 50\n;",
|
||||
"refId": "A",
|
||||
"select": [ ],
|
||||
"table": "event",
|
||||
"timeColumn": "\"time\"",
|
||||
"timeColumnType": "timestamp",
|
||||
"where": [
|
||||
{
|
||||
"name": "$__timeFilter",
|
||||
"params": [ ],
|
||||
"type": "macro"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Slowest Operation Executions",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "auto",
|
||||
"displayMode": "auto"
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 12,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 11
|
||||
},
|
||||
"id": 9,
|
||||
"options": {
|
||||
"footer": {
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true,
|
||||
"sortBy": [ ]
|
||||
},
|
||||
"pluginVersion": "8.3.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"format": "table",
|
||||
"group": [ ],
|
||||
"metricColumn": "none",
|
||||
"rawQuery": true,
|
||||
"rawSql": "SELECT\n status_message as \"Error\",\n count(*) as \"Occurrences\"\nFROM ps_trace.span\nWHERE $__timeFilter(start_time) AND\nstatus_code = 'STATUS_CODE_ERROR' AND\nservice_name = '${service}'\nGROUP BY 1\nORDER BY 2 DESC\n;",
|
||||
"refId": "A",
|
||||
"select": [ ],
|
||||
"table": "event",
|
||||
"timeColumn": "\"time\"",
|
||||
"timeColumnType": "timestamp",
|
||||
"where": [
|
||||
{
|
||||
"name": "$__timeFilter",
|
||||
"params": [ ],
|
||||
"type": "macro"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"title": "Most Common Errors",
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"refresh": "",
|
||||
"schemaVersion": 34,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"promscale",
|
||||
"apm"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "postgres",
|
||||
"uid": "${DS_TIMESCALEDB}"
|
||||
},
|
||||
"definition": "SELECT \n distinct(service_name)\nFROM ps_trace.span\nWHERE $__timeFilter(start_time)\n",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Service",
|
||||
"multi": false,
|
||||
"name": "service",
|
||||
"options": [ ],
|
||||
"query": "SELECT \n distinct(service_name)\nFROM ps_trace.span\nWHERE $__timeFilter(start_time)\n",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": { },
|
||||
"timezone": "",
|
||||
"title": "[2] Service Details",
|
||||
"uid": "YWfN6wL7z",
|
||||
"version": 36,
|
||||
"weekStart": ""
|
||||
}
|
2449
assets/promscale/dashboards/promscale.json
Normal file
2449
assets/promscale/dashboards/promscale.json
Normal file
File diff suppressed because it is too large
Load diff
1
assets/promscale/rules.yaml
Normal file
1
assets/promscale/rules.yaml
Normal file
|
@ -0,0 +1 @@
|
|||
null
|
|
@ -463,7 +463,7 @@
|
|||
"multi": false,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(up{job=~\".*thanos-store.*\"}, job)",
|
||||
"query": "label_values(up{job=~\".*thanos-bucket-replicate.*\"}, job)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
|
|
|
@ -1771,7 +1771,7 @@
|
|||
"multi": false,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(up{job=~\".*thanos-store.*\"}, job)",
|
||||
"query": "label_values(up{job=~\".*thanos-compact.*\"}, job)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
|
|
|
@ -2142,29 +2142,6 @@
|
|||
"query": "5m,10m,30m,1h,6h,12h",
|
||||
"refresh": 2,
|
||||
"type": "interval"
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "all",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "job",
|
||||
"multi": false,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(up{job=~\".*thanos-store.*\"}, job)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
|
|
|
@ -1069,7 +1069,7 @@
|
|||
"multi": false,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(up{job=~\".*thanos-store.*\"}, job)",
|
||||
"query": "label_values(up{job=~\".*thanos-query-frontend.*\"}, job)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
|
|
|
@ -1830,7 +1830,7 @@
|
|||
"multi": false,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(up{job=~\".*thanos-store.*\"}, job)",
|
||||
"query": "label_values(up{job=~\".*thanos-query.*\"}, job)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
|
|
|
@ -2174,7 +2174,7 @@
|
|||
"multi": false,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(up{job=~\".*thanos-store.*\"}, job)",
|
||||
"query": "label_values(up{job=~\".*thanos-receive.*\"}, job)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
|
|
|
@ -1822,7 +1822,7 @@
|
|||
"multi": false,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(up{job=~\".*thanos-store.*\"}, job)",
|
||||
"query": "label_values(up{job=~\".*thanos-rule.*\"}, job)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
|
|
|
@ -1459,7 +1459,7 @@
|
|||
"multi": false,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(up{job=~\".*thanos-store.*\"}, job)",
|
||||
"query": "label_values(up{job=~\".*thanos-sidecar.*\"}, job)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
|
|
18
hack/go.mod
18
hack/go.mod
|
@ -1,9 +1,23 @@
|
|||
module github.com/monitoring-mixins/website/hack
|
||||
|
||||
go 1.14
|
||||
go 1.17
|
||||
|
||||
require (
|
||||
github.com/brancz/gojsontoyaml v0.0.0-20191212081931-bf2969bbd742
|
||||
github.com/google/go-jsonnet v0.16.1-0.20200703153429-aaf50f5b655f
|
||||
github.com/google/go-jsonnet v0.18.0
|
||||
github.com/jsonnet-bundler/jsonnet-bundler v0.4.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc // indirect
|
||||
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf // indirect
|
||||
github.com/fatih/color v1.10.0 // indirect
|
||||
github.com/ghodss/yaml v1.0.0 // indirect
|
||||
github.com/mattn/go-colorable v0.1.8 // indirect
|
||||
github.com/mattn/go-isatty v0.0.12 // indirect
|
||||
github.com/pkg/errors v0.8.0 // indirect
|
||||
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae // indirect
|
||||
gopkg.in/alecthomas/kingpin.v2 v2.2.6 // indirect
|
||||
gopkg.in/yaml.v2 v2.2.7 // indirect
|
||||
sigs.k8s.io/yaml v1.1.0 // indirect
|
||||
)
|
||||
|
|
15
hack/go.sum
15
hack/go.sum
|
@ -11,10 +11,14 @@ github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
|
|||
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
|
||||
github.com/fatih/color v1.9.0 h1:8xPHl4/q1VyqGIPif1F+1V3Y3lSmrq01EabUW3CoW5s=
|
||||
github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=
|
||||
github.com/fatih/color v1.10.0 h1:s36xzo75JdqLaaWoiEHk767eHiwo0598uUxyfiPkDsg=
|
||||
github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM=
|
||||
github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
|
||||
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
|
||||
github.com/google/go-jsonnet v0.16.1-0.20200703153429-aaf50f5b655f h1:mw4KoMG5/DXLPhpKXQRYTEIZFkFo0a1HU2R1HbeYpek=
|
||||
github.com/google/go-jsonnet v0.16.1-0.20200703153429-aaf50f5b655f/go.mod h1:sOcuej3UW1vpPTZOr8L7RQimqai1a57bt5j22LzGZCw=
|
||||
github.com/google/go-jsonnet v0.18.0 h1:/6pTy6g+Jh1a1I2UMoAODkqELFiVIdOxbNwv0DDzoOg=
|
||||
github.com/google/go-jsonnet v0.18.0/go.mod h1:C3fTzyVJDslXdiTqw/bTFk7vSGyCtH3MGRbDfvEwGd0=
|
||||
github.com/jsonnet-bundler/jsonnet-bundler v0.4.0 h1:4BKZ6LDqPc2wJDmaKnmYD/vDjUptJtnUpai802MibFc=
|
||||
github.com/jsonnet-bundler/jsonnet-bundler v0.4.0/go.mod h1:/by7P/OoohkI3q4CgSFqcoFsVY+IaNbzOVDknEsKDeU=
|
||||
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
|
||||
|
@ -25,10 +29,14 @@ github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
|||
github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
|
||||
github.com/mattn/go-colorable v0.1.4 h1:snbPLB8fVfU9iwbbo30TPtbLRzwWu6aJS6Xh4eaaviA=
|
||||
github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
|
||||
github.com/mattn/go-colorable v0.1.8 h1:c1ghPdyEDarC70ftn0y+A/Ee++9zz8ljHG1b13eJ0s8=
|
||||
github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
|
||||
github.com/mattn/go-isatty v0.0.6/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
|
||||
github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
|
||||
github.com/mattn/go-isatty v0.0.11 h1:FxPOTFNqGkuDUGi3H/qkUbQO4ZiBa2brKq5r0l8TGeM=
|
||||
github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE=
|
||||
github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHXY=
|
||||
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
|
||||
github.com/pkg/errors v0.8.0 h1:WdK/asTD0HN+q6hsWO3/vpuAkAr+tw6aNJNDFFf0+qw=
|
||||
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
|
@ -43,6 +51,9 @@ golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5h
|
|||
golang.org/x/sys v0.0.0-20190310054646-10058d7d4faa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037 h1:YyJpGZS1sBuBCzLAR1VEpK193GlqGZbnPFnPV/5Rsb4=
|
||||
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae h1:/WDfKMnPU+m5M4xB+6x4kaepxRw6jWvR5iDRdvjHgy8=
|
||||
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc=
|
||||
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
|
@ -52,3 +63,7 @@ gopkg.in/yaml.v2 v2.1.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
|||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I=
|
||||
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.2.7 h1:VUgggvou5XRW9mHwD/yXxIYSMtY0zoKQf/v226p2nyo=
|
||||
gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
sigs.k8s.io/yaml v1.1.0 h1:4A07+ZFc2wgJwo8YNlQpr1rVlgUDlxXHhPJciaPY5gs=
|
||||
sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o=
|
||||
|
|
|
@ -114,6 +114,11 @@
|
|||
"name": "promtail",
|
||||
"source": "https://github.com/grafana/loki",
|
||||
"subdir": "production/promtail-mixin"
|
||||
},
|
||||
{
|
||||
"name": "promscale",
|
||||
"source": "https://github.com/timescale/promscale",
|
||||
"subdir": "docs/mixin"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
432
site/content/promscale/_index.md
Normal file
432
site/content/promscale/_index.md
Normal file
|
@ -0,0 +1,432 @@
|
|||
---
|
||||
title: promscale
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
|
||||
|
||||
{{< panel style="danger" >}}
|
||||
Jsonnet source code is available at [github.com/timescale/promscale](https://github.com/timescale/promscale/tree/master/docs/mixin)
|
||||
{{< /panel >}}
|
||||
|
||||
## Alerts
|
||||
|
||||
{{< panel style="warning" >}}
|
||||
Complete list of pregenerated alerts is available [here](https://github.com/monitoring-mixins/website/blob/master/assets/promscale/alerts.yaml).
|
||||
{{< /panel >}}
|
||||
|
||||
### promscale-general
|
||||
|
||||
##### PromscaleDown
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
alert: PromscaleDown
|
||||
annotations:
|
||||
description: No Promscale instance was found.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleDown.md
|
||||
summary: Promscale is down
|
||||
expr: absent(up{job=~".*promscale.*"})
|
||||
labels:
|
||||
severity: critical
|
||||
{{< /code >}}
|
||||
|
||||
### promscale-ingest
|
||||
|
||||
##### PromscaleIngestHighErrorRate
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
alert: PromscaleIngestHighErrorRate
|
||||
annotations:
|
||||
description: Promscale ingestion is having a {{ $value | humanizePercentage }} error
|
||||
rate.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleIngestHighErrorRate.md
|
||||
summary: High error rate in Promscale ingestion
|
||||
expr: |
|
||||
(
|
||||
sum by (job, instance, type) (
|
||||
rate(promscale_ingest_requests_total{code=~"5.."}[5m])
|
||||
)
|
||||
/
|
||||
sum by (job, instance, type) (
|
||||
rate(promscale_ingest_requests_total[5m])
|
||||
)
|
||||
) > 0.05
|
||||
labels:
|
||||
severity: warning
|
||||
{{< /code >}}
|
||||
|
||||
##### PromscaleIngestHighErrorRate
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
alert: PromscaleIngestHighErrorRate
|
||||
annotations:
|
||||
description: Promscale ingestion is having a {{ $value | humanizePercentage }} error
|
||||
rate.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleIngestHighErrorRate.md
|
||||
summary: High error rate in Promscale ingestion
|
||||
expr: |
|
||||
(
|
||||
sum by (job, instance, type) (
|
||||
rate(promscale_ingest_requests_total{code=~"5.."}[5m])
|
||||
)
|
||||
/
|
||||
sum by (job, instance, type) (
|
||||
rate(promscale_ingest_requests_total[5m])
|
||||
)
|
||||
) > 0.1
|
||||
labels:
|
||||
severity: critical
|
||||
{{< /code >}}
|
||||
|
||||
##### PromscaleIngestHighLatency
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
alert: PromscaleIngestHighLatency
|
||||
annotations:
|
||||
description: Slowest 10% of ingestion batch took more than {{ $value }} seconds
|
||||
to ingest.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleIngestHighLatency.md
|
||||
summary: Slow Promscale ingestion
|
||||
expr: |
|
||||
(
|
||||
histogram_quantile(
|
||||
0.90,
|
||||
sum by (job, instance, type, le) (
|
||||
rate(promscale_ingest_duration_seconds_bucket[5m])
|
||||
)
|
||||
) > 10
|
||||
and
|
||||
sum by (job, instance, type) (
|
||||
rate(promscale_ingest_duration_seconds_bucket[5m])
|
||||
)
|
||||
) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
{{< /code >}}
|
||||
|
||||
##### PromscaleIngestHighLatency
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
alert: PromscaleIngestHighLatency
|
||||
annotations:
|
||||
description: Slowest 10% of ingestion batch took more than {{ $value }} seconds
|
||||
to ingest.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleIngestHighLatency.md
|
||||
summary: Slow Promscale ingestion
|
||||
expr: |
|
||||
(
|
||||
histogram_quantile(
|
||||
0.90,
|
||||
sum by (job, instance, type, le) (
|
||||
rate(promscale_ingest_duration_seconds_bucket[5m])
|
||||
)
|
||||
) > 30
|
||||
and
|
||||
sum by (job, instance, type) (
|
||||
rate(promscale_ingest_duration_seconds_bucket[5m])
|
||||
)
|
||||
) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
{{< /code >}}
|
||||
|
||||
### promscale-query
|
||||
|
||||
##### PromscaleQueryHighErrorRate
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
alert: PromscaleQueryHighErrorRate
|
||||
annotations:
|
||||
description: Evaluating queries via Promscale has {{ $value | humanizePercentage
|
||||
}} error rate.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleQueryHighErrorRate.md
|
||||
summary: High error rate in querying Promscale
|
||||
expr: |
|
||||
(
|
||||
sum by (job, instance, type) (
|
||||
rate(promscale_query_requests_total{code=~"5.."}[5m])
|
||||
)
|
||||
/
|
||||
sum by (job, instance, type) (
|
||||
rate(promscale_query_requests_total[5m])
|
||||
)
|
||||
) > 0.05
|
||||
labels:
|
||||
severity: warning
|
||||
{{< /code >}}
|
||||
|
||||
##### PromscaleQueryHighErrorRate
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
alert: PromscaleQueryHighErrorRate
|
||||
annotations:
|
||||
description: Evaluating queries via Promscale had {{ $value | humanizePercentage
|
||||
}} error rate.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleQueryHighErrorRate.md
|
||||
summary: High error rate in querying Promscale
|
||||
expr: |
|
||||
(
|
||||
sum by (job, instance, type) (
|
||||
rate(promscale_query_requests_total{code=~"5.."}[5m])
|
||||
)
|
||||
/
|
||||
sum by (job, instance, type) (
|
||||
rate(promscale_query_requests_total[5m])
|
||||
)
|
||||
) > 0.1
|
||||
labels:
|
||||
severity: critical
|
||||
{{< /code >}}
|
||||
|
||||
##### PromscaleQueryHighLatency
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
alert: PromscaleQueryHighLatency
|
||||
annotations:
|
||||
description: Slowest 10% of the queries took more than {{ $value }} seconds to evaluate.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleQueryHighLatency.md
|
||||
summary: Slow Promscale querying
|
||||
expr: |
|
||||
(
|
||||
histogram_quantile(
|
||||
0.90,
|
||||
sum by (job, instance, type, le) (
|
||||
rate(promscale_query_duration_seconds_bucket[5m])
|
||||
)
|
||||
) > 5
|
||||
and
|
||||
sum by (job, instance, type) (
|
||||
rate(promscale_query_duration_seconds_bucket[5m])
|
||||
) > 0
|
||||
)
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
{{< /code >}}
|
||||
|
||||
##### PromscaleQueryHighLatency
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
alert: PromscaleQueryHighLatency
|
||||
annotations:
|
||||
description: Slowest 10% of the queries took {{ $value }} seconds to evaluate.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleQueryHighLatency.md
|
||||
summary: Slow Promscale querying
|
||||
expr: |
|
||||
(
|
||||
histogram_quantile(
|
||||
0.90,
|
||||
sum by (job, instance, type, le) (
|
||||
rate(promscale_query_duration_seconds_bucket[5m])
|
||||
)
|
||||
) > 10
|
||||
and
|
||||
sum by (job, instance, type) (
|
||||
rate(promscale_query_duration_seconds_bucket[5m])
|
||||
) > 0
|
||||
)
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
{{< /code >}}
|
||||
|
||||
### promscale-cache
|
||||
|
||||
##### PromscaleCacheHighNumberOfEvictions
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
alert: PromscaleCacheHighNumberOfEvictions
|
||||
annotations:
|
||||
description: Promscale {{ $labels.name }} is evicting at {{ $value }} entries a
|
||||
second.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleCacheHighNumberOfEvictions.md
|
||||
summary: High cache eviction in Promscale
|
||||
expr: |
|
||||
(
|
||||
sum by (job, instance, name, type) (
|
||||
rate(promscale_cache_evictions_total[5m])
|
||||
)
|
||||
/
|
||||
sum by (job, instance, name, type) (
|
||||
promscale_cache_capacity_elements
|
||||
)
|
||||
) > 0.2
|
||||
labels:
|
||||
severity: warning
|
||||
{{< /code >}}
|
||||
|
||||
##### PromscaleCacheTooSmall
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
alert: PromscaleCacheTooSmall
|
||||
annotations:
|
||||
description: Promscale {{ $labels.name }} has a hit ratio of {{ $value | humanizePercentage
|
||||
}}.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleCacheTooSmall.md
|
||||
summary: High cache eviction in Promscale
|
||||
expr: |
|
||||
(
|
||||
sum by (job, instance, type, name) (
|
||||
rate(promscale_cache_query_hits_total[5m])
|
||||
)
|
||||
/
|
||||
sum by (job, instance, type, name) (
|
||||
rate(promscale_cache_queries_total[5m])
|
||||
)
|
||||
) < 0.9
|
||||
labels:
|
||||
severity: warning
|
||||
{{< /code >}}
|
||||
|
||||
### promscale-database-connection
|
||||
|
||||
##### PromscaleStorageHighErrorRate
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
alert: PromscaleStorageHighErrorRate
|
||||
annotations:
|
||||
description: Promscale connection with the database has an error of {{ $value |
|
||||
humanizePercentage }}.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleStorageHighErrorRate.md
|
||||
summary: Promscale experiences a high error rate when connecting to the database
|
||||
expr: |
|
||||
(
|
||||
sum by (job) (
|
||||
# Error counter exists for query, query_row & exec, and not for send_batch.
|
||||
rate(promscale_database_request_errors_total{method=~"query.*|exec"}[5m])
|
||||
)
|
||||
/
|
||||
sum by (job) (
|
||||
rate(promscale_database_requests_total{method=~"query.*|exec"}[5m])
|
||||
)
|
||||
) > 0.05
|
||||
labels:
|
||||
severity: warning
|
||||
{{< /code >}}
|
||||
|
||||
##### PromscaleStorageHighLatency
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
alert: PromscaleStorageHighLatency
|
||||
annotations:
|
||||
description: Slowest 10% of database requests are taking more than {{ $value }}
|
||||
seconds to respond.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleStorageHighLatency.md
|
||||
summary: Slow database response
|
||||
expr: |
|
||||
(
|
||||
histogram_quantile(0.9,
|
||||
sum by (le, job, type) (
|
||||
rate(promscale_database_requests_duration_seconds_bucket[5m])
|
||||
)
|
||||
) > 5
|
||||
and
|
||||
sum by (job, type) (
|
||||
rate(promscale_database_requests_duration_seconds_count[5m])
|
||||
) > 0
|
||||
)
|
||||
labels:
|
||||
severity: warning
|
||||
{{< /code >}}
|
||||
|
||||
### promscale-database
|
||||
|
||||
##### PromscaleStorageUnhealthy
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
alert: PromscaleStorageUnhealthy
|
||||
annotations:
|
||||
description: Promscale connection with the database has an error of {{ $value |
|
||||
humanizePercentage }}.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleStorageUnhealthy.md
|
||||
summary: Promscale database is unhealthy
|
||||
expr: |
|
||||
(
|
||||
sum by (job) (
|
||||
rate(promscale_sql_database_health_check_errors_total[5m])
|
||||
)
|
||||
/
|
||||
sum by (job) (
|
||||
rate(promscale_sql_database_health_check_total[5m])
|
||||
)
|
||||
) > 0.05
|
||||
labels:
|
||||
severity: warning
|
||||
{{< /code >}}
|
||||
|
||||
##### PromscaleMaintenanceJobRunningTooLong
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
alert: PromscaleMaintenanceJobRunningTooLong
|
||||
annotations:
|
||||
description: Promscale Database is taking {{ $value }} seconds to respond to Promscale's
|
||||
requests.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleMaintenanceJobRunningTooLong.md
|
||||
summary: Promscale maintenance jobs taking too long to complete
|
||||
expr: |
|
||||
(
|
||||
(
|
||||
(
|
||||
time()
|
||||
-
|
||||
promscale_sql_database_worker_maintenance_job_start_timestamp_seconds
|
||||
)
|
||||
>
|
||||
30 * 60 * 2 # 30 mins (we launch maintenance jobs scheduled at 30 mins) * 60 (to seconds) * 2 (wait max for 2 complete scans before firing alert).
|
||||
)
|
||||
and
|
||||
promscale_sql_database_worker_maintenance_job_start_timestamp_seconds > 0
|
||||
)
|
||||
labels:
|
||||
severity: warning
|
||||
{{< /code >}}
|
||||
|
||||
##### PromscaleMaintenanceJobFailures
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
alert: PromscaleMaintenanceJobFailures
|
||||
annotations:
|
||||
description: Promscale maintenance job failed to successfully execute.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleMaintenanceJobFailures.md
|
||||
summary: Promscale maintenance job failed
|
||||
expr: promscale_sql_database_worker_maintenance_job_failed == 1
|
||||
labels:
|
||||
severity: warning
|
||||
{{< /code >}}
|
||||
|
||||
##### PromscaleCompressionLow
|
||||
|
||||
{{< code lang="yaml" >}}
|
||||
alert: PromscaleCompressionLow
|
||||
annotations:
|
||||
description: High uncompressed data in Promscale, on average, {{ $value }} uncompressed
|
||||
chunks per metric.
|
||||
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleCompressionLow.md
|
||||
summary: High uncompressed data
|
||||
expr: |
|
||||
(
|
||||
(
|
||||
(promscale_sql_database_chunks_count - promscale_sql_database_chunks_compressed_count) # Number of uncompressed chunks.
|
||||
/
|
||||
promscale_sql_database_metric_count
|
||||
) > 4 # If total number of average uncompressed chunk per metric is more than 4 chunks at maximum, we should alert.
|
||||
and
|
||||
promscale_sql_database_compression_status == 1
|
||||
)
|
||||
labels:
|
||||
severity: warning
|
||||
{{< /code >}}
|
||||
|
||||
## Dashboards
|
||||
Following dashboards are generated from mixins and hosted on github:
|
||||
|
||||
|
||||
- [apm-dependencies](https://github.com/monitoring-mixins/website/blob/master/assets/promscale/dashboards/apm-dependencies.json)
|
||||
- [apm-home](https://github.com/monitoring-mixins/website/blob/master/assets/promscale/dashboards/apm-home.json)
|
||||
- [apm-service-dependencies-downstream](https://github.com/monitoring-mixins/website/blob/master/assets/promscale/dashboards/apm-service-dependencies-downstream.json)
|
||||
- [apm-service-dependencies-upstream](https://github.com/monitoring-mixins/website/blob/master/assets/promscale/dashboards/apm-service-dependencies-upstream.json)
|
||||
- [apm-service-overview](https://github.com/monitoring-mixins/website/blob/master/assets/promscale/dashboards/apm-service-overview.json)
|
||||
- [promscale](https://github.com/monitoring-mixins/website/blob/master/assets/promscale/dashboards/promscale.json)
|
|
@ -114,6 +114,11 @@
|
|||
"name": "promtail",
|
||||
"source": "https://github.com/grafana/loki",
|
||||
"subdir": "production/promtail-mixin"
|
||||
},
|
||||
{
|
||||
"name": "promscale",
|
||||
"source": "https://github.com/timescale/promscale",
|
||||
"subdir": "docs/mixin"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue