1
0
Fork 0
mirror of https://github.com/monitoring-mixins/website.git synced 2024-12-14 11:37:31 +00:00

Merge pull request #24 from paulfantom/promscale

This commit is contained in:
Paweł Krupa 2022-05-02 11:04:01 +02:00 committed by GitHub
commit 49fbf2ed0a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
24 changed files with 5308 additions and 126 deletions

View file

@ -16,5 +16,5 @@ jobs:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
- uses: actions/setup-go@v2 - uses: actions/setup-go@v2
with: with:
go-version: '^1.14' go-version: '^1.17'
- run: make generate - run: make generate

View file

@ -810,7 +810,7 @@
}, },
{ {
"collapse": false, "collapse": false,
"height": "250px", "collapsed": false,
"panels": [ "panels": [
{ {
"aliasColors": { }, "aliasColors": { },
@ -819,6 +819,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"gridPos": { },
"id": 10, "id": 10,
"legend": { "legend": {
"avg": false, "avg": false,
@ -845,7 +846,7 @@
} }
], ],
"spaceLength": 10, "spaceLength": 10,
"span": 4, "span": 6,
"stack": false, "stack": false,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
@ -907,6 +908,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"gridPos": { },
"id": 11, "id": 11,
"legend": { "legend": {
"avg": false, "avg": false,
@ -933,7 +935,7 @@
} }
], ],
"spaceLength": 10, "spaceLength": 10,
"span": 4, "span": 6,
"stack": false, "stack": false,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
@ -995,6 +997,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"gridPos": { },
"id": 12, "id": 12,
"legend": { "legend": {
"avg": false, "avg": false,
@ -1015,7 +1018,7 @@
"renderer": "flot", "renderer": "flot",
"seriesOverrides": [ ], "seriesOverrides": [ ],
"spaceLength": 10, "spaceLength": 10,
"span": 4, "span": 6,
"stack": false, "stack": false,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
@ -1061,19 +1064,7 @@
"show": false "show": false
} }
] ]
} },
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Querier",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{ {
"aliasColors": { }, "aliasColors": { },
"bars": false, "bars": false,
@ -1081,6 +1072,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"gridPos": { },
"id": 13, "id": 13,
"legend": { "legend": {
"avg": false, "avg": false,
@ -1101,7 +1093,7 @@
"renderer": "flot", "renderer": "flot",
"seriesOverrides": [ ], "seriesOverrides": [ ],
"spaceLength": 10, "spaceLength": 10,
"span": 4, "span": 6,
"stack": true, "stack": true,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
@ -1157,6 +1149,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"gridPos": { },
"id": 14, "id": 14,
"legend": { "legend": {
"avg": false, "avg": false,
@ -1177,7 +1170,7 @@
"renderer": "flot", "renderer": "flot",
"seriesOverrides": [ ], "seriesOverrides": [ ],
"spaceLength": 10, "spaceLength": 10,
"span": 4, "span": 6,
"stack": true, "stack": true,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
@ -1233,6 +1226,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"gridPos": { },
"id": 15, "id": 15,
"legend": { "legend": {
"avg": false, "avg": false,
@ -1253,7 +1247,7 @@
"renderer": "flot", "renderer": "flot",
"seriesOverrides": [ ], "seriesOverrides": [ ],
"spaceLength": 10, "spaceLength": 10,
"span": 4, "span": 6,
"stack": false, "stack": false,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
@ -1307,12 +1301,13 @@
"repeatIteration": null, "repeatIteration": null,
"repeatRowId": null, "repeatRowId": null,
"showTitle": true, "showTitle": true,
"title": "", "title": "Querier",
"titleSize": "h6" "titleSize": "h6",
"type": "row"
}, },
{ {
"collapse": false, "collapse": false,
"height": "250px", "collapsed": false,
"panels": [ "panels": [
{ {
"aliasColors": { }, "aliasColors": { },
@ -1321,6 +1316,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"gridPos": { },
"id": 16, "id": 16,
"legend": { "legend": {
"avg": false, "avg": false,
@ -1347,7 +1343,7 @@
} }
], ],
"spaceLength": 10, "spaceLength": 10,
"span": 4, "span": 6,
"stack": false, "stack": false,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
@ -1409,6 +1405,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"gridPos": { },
"id": 17, "id": 17,
"legend": { "legend": {
"avg": false, "avg": false,
@ -1435,7 +1432,7 @@
} }
], ],
"spaceLength": 10, "spaceLength": 10,
"span": 4, "span": 6,
"stack": false, "stack": false,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
@ -1497,6 +1494,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"gridPos": { },
"id": 18, "id": 18,
"legend": { "legend": {
"avg": false, "avg": false,
@ -1517,7 +1515,7 @@
"renderer": "flot", "renderer": "flot",
"seriesOverrides": [ ], "seriesOverrides": [ ],
"spaceLength": 10, "spaceLength": 10,
"span": 4, "span": 6,
"stack": false, "stack": false,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
@ -1563,19 +1561,7 @@
"show": false "show": false
} }
] ]
} },
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Index Gateway",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{ {
"aliasColors": { }, "aliasColors": { },
"bars": false, "bars": false,
@ -1583,6 +1569,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"gridPos": { },
"id": 19, "id": 19,
"legend": { "legend": {
"avg": false, "avg": false,
@ -1603,7 +1590,7 @@
"renderer": "flot", "renderer": "flot",
"seriesOverrides": [ ], "seriesOverrides": [ ],
"spaceLength": 10, "spaceLength": 10,
"span": 3, "span": 6,
"stack": true, "stack": true,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
@ -1659,6 +1646,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"gridPos": { },
"id": 20, "id": 20,
"legend": { "legend": {
"avg": false, "avg": false,
@ -1679,7 +1667,7 @@
"renderer": "flot", "renderer": "flot",
"seriesOverrides": [ ], "seriesOverrides": [ ],
"spaceLength": 10, "spaceLength": 10,
"span": 3, "span": 6,
"stack": true, "stack": true,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
@ -1735,6 +1723,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"gridPos": { },
"id": 21, "id": 21,
"legend": { "legend": {
"avg": false, "avg": false,
@ -1755,7 +1744,7 @@
"renderer": "flot", "renderer": "flot",
"seriesOverrides": [ ], "seriesOverrides": [ ],
"spaceLength": 10, "spaceLength": 10,
"span": 3, "span": 6,
"stack": false, "stack": false,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
@ -1811,6 +1800,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"gridPos": { },
"id": 22, "id": 22,
"legend": { "legend": {
"avg": false, "avg": false,
@ -1831,7 +1821,7 @@
"renderer": "flot", "renderer": "flot",
"seriesOverrides": [ ], "seriesOverrides": [ ],
"spaceLength": 10, "spaceLength": 10,
"span": 3, "span": 6,
"stack": false, "stack": false,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
@ -1885,8 +1875,9 @@
"repeatIteration": null, "repeatIteration": null,
"repeatRowId": null, "repeatRowId": null,
"showTitle": true, "showTitle": true,
"title": "", "title": "Index Gateway",
"titleSize": "h6" "titleSize": "h6",
"type": "row"
}, },
{ {
"collapse": false, "collapse": false,
@ -2152,7 +2143,7 @@
}, },
{ {
"collapse": false, "collapse": false,
"height": "250px", "collapsed": false,
"panels": [ "panels": [
{ {
"aliasColors": { }, "aliasColors": { },
@ -2161,6 +2152,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"gridPos": { },
"id": 26, "id": 26,
"legend": { "legend": {
"avg": false, "avg": false,
@ -2237,6 +2229,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"gridPos": { },
"id": 27, "id": 27,
"legend": { "legend": {
"avg": false, "avg": false,
@ -2317,19 +2310,7 @@
"show": false "show": false
} }
] ]
} },
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Ruler",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{ {
"aliasColors": { }, "aliasColors": { },
"bars": false, "bars": false,
@ -2337,6 +2318,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"gridPos": { },
"id": 28, "id": 28,
"legend": { "legend": {
"avg": false, "avg": false,
@ -2425,6 +2407,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"gridPos": { },
"id": 29, "id": 29,
"legend": { "legend": {
"avg": false, "avg": false,
@ -2497,8 +2480,9 @@
"repeatIteration": null, "repeatIteration": null,
"repeatRowId": null, "repeatRowId": null,
"showTitle": true, "showTitle": true,
"title": "", "title": "Ruler",
"titleSize": "h6" "titleSize": "h6",
"type": "row"
} }
], ],
"schemaVersion": 14, "schemaVersion": 14,

View file

@ -548,7 +548,7 @@
}, },
{ {
"collapse": false, "collapse": false,
"height": "250px", "collapsed": false,
"panels": [ "panels": [
{ {
"aliasColors": { }, "aliasColors": { },
@ -557,6 +557,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"gridPos": { },
"id": 7, "id": 7,
"legend": { "legend": {
"avg": false, "avg": false,
@ -631,6 +632,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"gridPos": { },
"id": 8, "id": 8,
"legend": { "legend": {
"avg": false, "avg": false,
@ -711,19 +713,7 @@
"show": false "show": false
} }
] ]
} },
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Ingester",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{ {
"aliasColors": { }, "aliasColors": { },
"bars": false, "bars": false,
@ -731,6 +721,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"gridPos": { },
"id": 9, "id": 9,
"legend": { "legend": {
"avg": false, "avg": false,
@ -819,6 +810,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"gridPos": { },
"id": 10, "id": 10,
"legend": { "legend": {
"avg": false, "avg": false,
@ -885,19 +877,7 @@
"show": false "show": false
} }
] ]
} },
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "",
"titleSize": "h6"
},
{
"collapse": false,
"height": "250px",
"panels": [
{ {
"aliasColors": { }, "aliasColors": { },
"bars": false, "bars": false,
@ -905,6 +885,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"gridPos": { },
"id": 11, "id": 11,
"legend": { "legend": {
"avg": false, "avg": false,
@ -925,7 +906,7 @@
"renderer": "flot", "renderer": "flot",
"seriesOverrides": [ ], "seriesOverrides": [ ],
"spaceLength": 10, "spaceLength": 10,
"span": 4, "span": 6,
"stack": true, "stack": true,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
@ -981,6 +962,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 10, "fill": 10,
"gridPos": { },
"id": 12, "id": 12,
"legend": { "legend": {
"avg": false, "avg": false,
@ -1001,7 +983,7 @@
"renderer": "flot", "renderer": "flot",
"seriesOverrides": [ ], "seriesOverrides": [ ],
"spaceLength": 10, "spaceLength": 10,
"span": 4, "span": 6,
"stack": true, "stack": true,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
@ -1057,6 +1039,7 @@
"dashes": false, "dashes": false,
"datasource": "$datasource", "datasource": "$datasource",
"fill": 1, "fill": 1,
"gridPos": { },
"id": 13, "id": 13,
"legend": { "legend": {
"avg": false, "avg": false,
@ -1077,7 +1060,7 @@
"renderer": "flot", "renderer": "flot",
"seriesOverrides": [ ], "seriesOverrides": [ ],
"spaceLength": 10, "spaceLength": 10,
"span": 4, "span": 6,
"stack": false, "stack": false,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
@ -1131,8 +1114,9 @@
"repeatIteration": null, "repeatIteration": null,
"repeatRowId": null, "repeatRowId": null,
"showTitle": true, "showTitle": true,
"title": "", "title": "Ingester",
"titleSize": "h6" "titleSize": "h6",
"type": "row"
} }
], ],
"schemaVersion": 14, "schemaVersion": 14,

View file

@ -0,0 +1,321 @@
groups:
- name: promscale-general
rules:
- alert: PromscaleDown
annotations:
description: No Promscale instance was found.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleDown.md
summary: Promscale is down
expr: absent(up{job=~".*promscale.*"})
labels:
severity: critical
- name: promscale-ingest
rules:
- alert: PromscaleIngestHighErrorRate
annotations:
description: Promscale ingestion is having a {{ $value | humanizePercentage
}} error rate.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleIngestHighErrorRate.md
summary: High error rate in Promscale ingestion
expr: |
(
sum by (job, instance, type) (
rate(promscale_ingest_requests_total{code=~"5.."}[5m])
)
/
sum by (job, instance, type) (
rate(promscale_ingest_requests_total[5m])
)
) > 0.05
labels:
severity: warning
- alert: PromscaleIngestHighErrorRate
annotations:
description: Promscale ingestion is having a {{ $value | humanizePercentage
}} error rate.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleIngestHighErrorRate.md
summary: High error rate in Promscale ingestion
expr: |
(
sum by (job, instance, type) (
rate(promscale_ingest_requests_total{code=~"5.."}[5m])
)
/
sum by (job, instance, type) (
rate(promscale_ingest_requests_total[5m])
)
) > 0.1
labels:
severity: critical
- alert: PromscaleIngestHighLatency
annotations:
description: Slowest 10% of ingestion batch took more than {{ $value }} seconds
to ingest.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleIngestHighLatency.md
summary: Slow Promscale ingestion
expr: |
(
histogram_quantile(
0.90,
sum by (job, instance, type, le) (
rate(promscale_ingest_duration_seconds_bucket[5m])
)
) > 10
and
sum by (job, instance, type) (
rate(promscale_ingest_duration_seconds_bucket[5m])
)
) > 0
for: 5m
labels:
severity: warning
- alert: PromscaleIngestHighLatency
annotations:
description: Slowest 10% of ingestion batch took more than {{ $value }} seconds
to ingest.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleIngestHighLatency.md
summary: Slow Promscale ingestion
expr: |
(
histogram_quantile(
0.90,
sum by (job, instance, type, le) (
rate(promscale_ingest_duration_seconds_bucket[5m])
)
) > 30
and
sum by (job, instance, type) (
rate(promscale_ingest_duration_seconds_bucket[5m])
)
) > 0
for: 5m
labels:
severity: critical
- name: promscale-query
rules:
- alert: PromscaleQueryHighErrorRate
annotations:
description: Evaluating queries via Promscale has {{ $value | humanizePercentage
}} error rate.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleQueryHighErrorRate.md
summary: High error rate in querying Promscale
expr: |
(
sum by (job, instance, type) (
rate(promscale_query_requests_total{code=~"5.."}[5m])
)
/
sum by (job, instance, type) (
rate(promscale_query_requests_total[5m])
)
) > 0.05
labels:
severity: warning
- alert: PromscaleQueryHighErrorRate
annotations:
description: Evaluating queries via Promscale had {{ $value | humanizePercentage
}} error rate.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleQueryHighErrorRate.md
summary: High error rate in querying Promscale
expr: |
(
sum by (job, instance, type) (
rate(promscale_query_requests_total{code=~"5.."}[5m])
)
/
sum by (job, instance, type) (
rate(promscale_query_requests_total[5m])
)
) > 0.1
labels:
severity: critical
- alert: PromscaleQueryHighLatency
annotations:
description: Slowest 10% of the queries took more than {{ $value }} seconds
to evaluate.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleQueryHighLatency.md
summary: Slow Promscale querying
expr: |
(
histogram_quantile(
0.90,
sum by (job, instance, type, le) (
rate(promscale_query_duration_seconds_bucket[5m])
)
) > 5
and
sum by (job, instance, type) (
rate(promscale_query_duration_seconds_bucket[5m])
) > 0
)
for: 5m
labels:
severity: warning
- alert: PromscaleQueryHighLatency
annotations:
description: Slowest 10% of the queries took {{ $value }} seconds to evaluate.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleQueryHighLatency.md
summary: Slow Promscale querying
expr: |
(
histogram_quantile(
0.90,
sum by (job, instance, type, le) (
rate(promscale_query_duration_seconds_bucket[5m])
)
) > 10
and
sum by (job, instance, type) (
rate(promscale_query_duration_seconds_bucket[5m])
) > 0
)
for: 5m
labels:
severity: critical
- name: promscale-cache
rules:
- alert: PromscaleCacheHighNumberOfEvictions
annotations:
description: Promscale {{ $labels.name }} is evicting at {{ $value }} entries
a second.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleCacheHighNumberOfEvictions.md
summary: High cache eviction in Promscale
expr: |
(
sum by (job, instance, name, type) (
rate(promscale_cache_evictions_total[5m])
)
/
sum by (job, instance, name, type) (
promscale_cache_capacity_elements
)
) > 0.2
labels:
severity: warning
- alert: PromscaleCacheTooSmall
annotations:
description: Promscale {{ $labels.name }} has a hit ratio of {{ $value | humanizePercentage
}}.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleCacheTooSmall.md
summary: High cache eviction in Promscale
expr: |
(
sum by (job, instance, type, name) (
rate(promscale_cache_query_hits_total[5m])
)
/
sum by (job, instance, type, name) (
rate(promscale_cache_queries_total[5m])
)
) < 0.9
labels:
severity: warning
- name: promscale-database-connection
rules:
- alert: PromscaleStorageHighErrorRate
annotations:
description: Promscale connection with the database has an error of {{ $value
| humanizePercentage }}.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleStorageHighErrorRate.md
summary: Promscale experiences a high error rate when connecting to the database
expr: |
(
sum by (job) (
# Error counter exists for query, query_row & exec, and not for send_batch.
rate(promscale_database_request_errors_total{method=~"query.*|exec"}[5m])
)
/
sum by (job) (
rate(promscale_database_requests_total{method=~"query.*|exec"}[5m])
)
) > 0.05
labels:
severity: warning
- alert: PromscaleStorageHighLatency
annotations:
description: Slowest 10% of database requests are taking more than {{ $value
}} seconds to respond.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleStorageHighLatency.md
summary: Slow database response
expr: |
(
histogram_quantile(0.9,
sum by (le, job, type) (
rate(promscale_database_requests_duration_seconds_bucket[5m])
)
) > 5
and
sum by (job, type) (
rate(promscale_database_requests_duration_seconds_count[5m])
) > 0
)
labels:
severity: warning
- name: promscale-database
rules:
- alert: PromscaleStorageUnhealthy
annotations:
description: Promscale connection with the database has an error of {{ $value
| humanizePercentage }}.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleStorageUnhealthy.md
summary: Promscale database is unhealthy
expr: |
(
sum by (job) (
rate(promscale_sql_database_health_check_errors_total[5m])
)
/
sum by (job) (
rate(promscale_sql_database_health_check_total[5m])
)
) > 0.05
labels:
severity: warning
- alert: PromscaleMaintenanceJobRunningTooLong
annotations:
description: Promscale Database is taking {{ $value }} seconds to respond to
Promscale's requests.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleMaintenanceJobRunningTooLong.md
summary: Promscale maintenance jobs taking too long to complete
expr: |
(
(
(
time()
-
promscale_sql_database_worker_maintenance_job_start_timestamp_seconds
)
>
30 * 60 * 2 # 30 mins (we launch maintenance jobs scheduled at 30 mins) * 60 (to seconds) * 2 (wait max for 2 complete scans before firing alert).
)
and
promscale_sql_database_worker_maintenance_job_start_timestamp_seconds > 0
)
labels:
severity: warning
- alert: PromscaleMaintenanceJobFailures
annotations:
description: Promscale maintenance job failed to successfully execute.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleMaintenanceJobFailures.md
summary: Promscale maintenance job failed
expr: promscale_sql_database_worker_maintenance_job_failed == 1
labels:
severity: warning
- alert: PromscaleCompressionLow
annotations:
description: High uncompressed data in Promscale, on average, {{ $value }} uncompressed
chunks per metric.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleCompressionLow.md
summary: High uncompressed data
expr: |
(
(
(promscale_sql_database_chunks_count - promscale_sql_database_chunks_compressed_count) # Number of uncompressed chunks.
/
promscale_sql_database_metric_count
) > 4 # If total number of average uncompressed chunk per metric is more than 4 chunks at maximum, we should alert.
and
promscale_sql_database_compression_status == 1
)
labels:
severity: warning

View file

@ -0,0 +1,308 @@
{
"__inputs": [
{
"description": "",
"label": "TimescaleDB / PostgreSQL data source",
"name": "DS_TIMESCALEDB",
"pluginId": "postgres",
"pluginName": "PostgreSQL",
"type": "datasource"
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [ ],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 11,
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"promscale",
"apm"
],
"targetBlank": false,
"title": "Menu",
"tooltip": "",
"type": "dashboards",
"url": ""
}
],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"displayMode": "auto"
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Total exec time"
},
"properties": [
{
"id": "unit",
"value": "ms"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Avg exec time"
},
"properties": [
{
"id": "unit",
"value": "ms"
},
{
"id": "decimals",
"value": 2
}
]
},
{
"matcher": {
"id": "byName",
"options": "Source"
},
"properties": [
{
"id": "links",
"value": [
{
"title": "Show service overview",
"url": "/d/YWfN6wL7z/?var-service=${__value.raw}"
}
]
}
]
},
{
"matcher": {
"id": "byName",
"options": "Target"
},
"properties": [
{
"id": "links",
"value": [
{
"title": "Show service overview",
"url": "/d/YWfN6wL7z/?var-service=${__value.raw}"
}
]
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 0
},
"id": 4,
"options": {
"footer": {
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true
},
"pluginVersion": "8.3.3",
"targets": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"format": "table",
"group": [ ],
"metricColumn": "none",
"rawQuery": true,
"rawSql": "SELECT\n p.service_name as \"Source\",\n k.service_name as \"Target\",\n k.span_name as \"Operation\",\n count(*) as \"Calls\",\n sum(k.duration_ms) as \"Total exec time\",\n avg(k.duration_ms) as \"Avg exec time\"\nFROM ps_trace.span p\nINNER JOIN ps_trace.span k\nON (p.trace_id = k.trace_id\nAND p.span_id = k.parent_span_id\nAND p.service_name != k.service_name)\nWHERE p.start_time >= NOW() - INTERVAL '10 minutes'\nAND k.start_time >= NOW() - INTERVAL '10 minutes'\nGROUP BY 1, 2, 3\nORDER BY 5 DESC",
"refId": "A",
"select": [
[
{
"params": [
"span_duration_ms"
],
"type": "column"
}
]
],
"table": "event",
"timeColumn": "\"time\"",
"timeColumnType": "timestamp",
"where": [
{
"name": "$__timeFilter",
"params": [ ],
"type": "macro"
}
]
}
],
"title": "Service Dependencies (last 10 minutes)",
"type": "table"
},
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"description": "This maps shows all services sending traces and the interactions between them.\nEach arrow represents a service calling a specific operation in another service. The legend in the arrows includes the requests per second for that interaction. If you are downsampling your traces before sending them to Promscale, then the number of requests per second will not be accurate but you'll be able to see how it compares to other operations.",
"gridPos": {
"h": 19,
"w": 24,
"x": 0,
"y": 8
},
"id": 2,
"targets": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"format": "table",
"group": [ ],
"metricColumn": "none",
"rawQuery": true,
"rawSql": "SELECT \n service_name as id,\n service_name as title\nFROM ps_trace.span\nWHERE start_time >= NOW() - INTERVAL '10 minutes'\nGROUP BY service_name",
"refId": "A",
"select": [
[
{
"params": [
"span_duration_ms"
],
"type": "column"
}
]
],
"table": "event",
"timeColumn": "\"time\"",
"timeColumnType": "timestamp",
"where": [
{
"name": "$__timeFilter",
"params": [ ],
"type": "macro"
}
]
},
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"format": "table",
"group": [ ],
"hide": false,
"metricColumn": "none",
"rawQuery": true,
"rawSql": "SELECT\n p.service_name || '->' || k.service_name || ':' || k.span_name as id,\n p.service_name as source,\n k.service_name as target,\n k.span_name as \"mainStat\",\n count(*) as \"secondaryStat\"\nFROM ps_trace.span p\nINNER JOIN ps_trace.span k\nON (p.trace_id = k.trace_id\nAND p.span_id = k.parent_span_id\nAND p.service_name != k.service_name)\nWHERE p.start_time >= NOW() - INTERVAL '10 minutes'\nAND k.start_time >= NOW() - INTERVAL '10 minutes'\nGROUP BY 1, 2, 3, 4",
"refId": "B",
"select": [
[
{
"params": [
"span_duration_ms"
],
"type": "column"
}
]
],
"table": "event",
"timeColumn": "\"time\"",
"timeColumnType": "timestamp",
"where": [
{
"name": "$__timeFilter",
"params": [ ],
"type": "macro"
}
]
}
],
"title": "Service Map (last 10 minutes)",
"type": "nodeGraph"
}
],
"schemaVersion": 34,
"style": "dark",
"tags": [
"promscale",
"apm"
],
"templating": {
"list": [ ]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"hidden": true
},
"timezone": "",
"title": "[3] Service Map",
"uid": "K03UKvPnz",
"version": 13,
"weekStart": ""
}

View file

@ -0,0 +1,531 @@
{
"__inputs": [
{
"description": "",
"label": "TimescaleDB / PostgreSQL data source",
"name": "DS_TIMESCALEDB",
"pluginId": "postgres",
"pluginName": "PostgreSQL",
"type": "datasource"
},
{
"description": "",
"label": "Promscale Jaeger Tracing data source",
"name": "DS_PROMSCALE_JAEGER",
"pluginId": "jaeger",
"pluginName": "Jaeger",
"type": "datasource"
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [ ],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 10,
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"promscale",
"apm"
],
"targetBlank": false,
"title": "Menu",
"tooltip": "",
"type": "dashboards",
"url": ""
},
{
"asDropdown": false,
"icon": "doc",
"includeVars": false,
"keepTime": false,
"tags": [ ],
"targetBlank": true,
"title": "Documentation",
"tooltip": "",
"type": "link",
"url": "https://docs.timescale.com/promscale/latest/"
}
],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"displayMode": "auto"
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Requests"
},
"properties": [
{
"id": "unit",
"value": "reqps"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Avg Duration"
},
"properties": [
{
"id": "unit",
"value": "ms"
}
]
},
{
"matcher": {
"id": "byName",
"options": "p90 Duration"
},
"properties": [
{
"id": "unit",
"value": "ms"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Error rate"
},
"properties": [
{
"id": "unit",
"value": "percentunit"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Service"
},
"properties": [
{
"id": "links",
"value": [
{
"title": "Show service overview",
"url": "/d/YWfN6wL7z/?var-service=${__value.raw}"
}
]
}
]
}
]
},
"gridPos": {
"h": 24,
"w": 13,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"footer": {
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true,
"sortBy": [
{
"desc": false,
"displayName": "Avg Duration"
}
]
},
"pluginVersion": "8.3.3",
"targets": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"format": "table",
"group": [ ],
"hide": false,
"metricColumn": "none",
"rawQuery": true,
"rawSql": "SELECT\n service_name AS \"Service\",\n COUNT(*)::numeric / (30 * 60) AS \"Requests\",\n AVG(duration_ms) AS \"Avg Duration\",\n ROUND(approx_percentile(0.90, percentile_agg(duration_ms))::numeric, 3) AS \"p90 Duration\",\n (count(*) filter (where status_code = 'STATUS_CODE_ERROR')::numeric / count(*)) AS \"Error rate\"\nFROM ps_trace.span s\nWHERE start_time > NOW() - INTERVAL '30m'\nAND (span_kind = 'SPAN_KIND_SERVER' OR parent_span_id is NULL)\nGROUP BY 1\nORDER BY 2",
"refId": "A",
"select": [
[
{
"params": [
"span_duration_ms"
],
"type": "column"
}
]
],
"table": "event",
"timeColumn": "\"time\"",
"timeColumnType": "timestamp",
"where": [
{
"name": "$__timeFilter",
"params": [ ],
"type": "macro"
}
]
}
],
"title": "Services (Last 30 minutes)",
"type": "table"
},
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"displayMode": "auto"
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Duration"
},
"properties": [
{
"id": "unit",
"value": "ms"
},
{
"id": "decimals",
"value": 2
},
{
"id": "custom.width",
"value": 143
}
]
},
{
"matcher": {
"id": "byName",
"options": "Trace ID"
},
"properties": [
{
"id": "custom.width",
"value": 282
},
{
"id": "links",
"value": [
{
"targetBlank": true,
"title": "View trace details",
"url": "/explore?left=%5B%22${__from}%22,%22${__to}%22,%22${DS_PROMSCALE_JAEGER}%22,%7B\"query\":\"${__value.raw}\"%7D%5D"
}
]
}
]
},
{
"matcher": {
"id": "byName",
"options": "start_time"
},
"properties": [
{
"id": "custom.width",
"value": 182
}
]
},
{
"matcher": {
"id": "byName",
"options": "Trace ID"
},
"properties": [
{
"id": "custom.width",
"value": 94
}
]
},
{
"matcher": {
"id": "byName",
"options": "Service"
},
"properties": [
{
"id": "links",
"value": [
{
"title": "Show service overview",
"url": "/d/YWfN6wL7z/?var-service=${__value.raw}"
}
]
}
]
}
]
},
"gridPos": {
"h": 12,
"w": 11,
"x": 13,
"y": 0
},
"id": 4,
"options": {
"footer": {
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true,
"sortBy": [ ]
},
"pluginVersion": "8.3.3",
"targets": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"format": "table",
"group": [ ],
"metricColumn": "none",
"rawQuery": true,
"rawSql": "SELECT\n replace(trace_id::text, '-'::text, ''::text) as \"Trace ID\",\n service_name as \"Service\",\n span_name as \"Operation\",\n start_time as \"Time\",\n duration_ms as \"Duration\" \nFROM ps_trace.span\nWHERE start_time > NOW() - INTERVAL '30m'\nAND parent_span_id is null\nORDER BY duration_ms DESC\nLIMIT 50\n;",
"refId": "A",
"select": [
[
{
"params": [
"span_duration_ms"
],
"type": "column"
}
]
],
"table": "event",
"timeColumn": "\"time\"",
"timeColumnType": "timestamp",
"where": [
{
"name": "$__timeFilter",
"params": [ ],
"type": "macro"
}
]
}
],
"title": "Slowest Requests (last 30 minutes)",
"type": "table"
},
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"displayMode": "auto"
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Service"
},
"properties": [
{
"id": "links",
"value": [
{
"title": "Show service overview",
"url": "/d/YWfN6wL7z/?var-service=${__value.raw}"
}
]
}
]
}
]
},
"gridPos": {
"h": 12,
"w": 11,
"x": 13,
"y": 12
},
"id": 5,
"options": {
"footer": {
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true,
"sortBy": [ ]
},
"pluginVersion": "8.3.3",
"targets": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"format": "table",
"group": [ ],
"metricColumn": "none",
"rawQuery": true,
"rawSql": "SELECT\n status_message as \"Error\",\n service_name as \"Service\",\n count(*) as \"Occurrences\" \nFROM ps_trace.span\nWHERE start_time > NOW() - INTERVAL '30m'\nAND status_code = 'STATUS_CODE_ERROR'\nGROUP BY 1, 2\nORDER BY 3\n;",
"refId": "A",
"select": [
[
{
"params": [
"span_duration_ms"
],
"type": "column"
}
]
],
"table": "event",
"timeColumn": "\"time\"",
"timeColumnType": "timestamp",
"where": [
{
"name": "$__timeFilter",
"params": [ ],
"type": "macro"
}
]
}
],
"title": "Most Common Errors (last 30 minutes)",
"type": "table"
}
],
"schemaVersion": 34,
"style": "dark",
"tags": [
"promscale",
"apm"
],
"templating": {
"list": [ ]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"hidden": true
},
"timezone": "",
"title": "[1] Overview",
"uid": "vBhEewLnk",
"version": 35,
"weekStart": ""
}

View file

@ -0,0 +1,202 @@
{
"__inputs": [
{
"description": "",
"label": "TimescaleDB / PostgreSQL data source",
"name": "DS_TIMESCALEDB",
"pluginId": "postgres",
"pluginName": "PostgreSQL",
"type": "datasource"
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [ ],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 13,
"iteration": 1647423383157,
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"promscale",
"apm"
],
"targetBlank": false,
"title": "Menu",
"tooltip": "",
"type": "dashboards",
"url": ""
}
],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"description": "A.K.A. \"Who do I call?\"",
"gridPos": {
"h": 20,
"w": 24,
"x": 0,
"y": 0
},
"id": 2,
"targets": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"format": "table",
"group": [ ],
"metricColumn": "none",
"rawQuery": true,
"rawSql": "WITH RECURSIVE x AS\n(\n SELECT\n trace_id,\n span_id,\n parent_span_id,\n service_name,\n span_name\n FROM ps_trace.span\n WHERE start_time > NOW() - INTERVAL '10 minutes'\n AND service_name = '${service}'\n AND span_name = '${operation}'\n UNION ALL\n SELECT\n s.trace_id,\n s.span_id,\n s.parent_span_id,\n s.service_name,\n s.span_name\n FROM x\n INNER JOIN ps_trace.span s\n ON (x.trace_id = s.trace_id\n AND x.span_id = s.parent_span_id)\n AND s.start_time > NOW() - INTERVAL '10 minutes'\n)\nSELECT\n md5(service_name || '-' || span_name) as id,\n span_name as title,\n service_name as \"subTitle\",\n count(*) as \"mainStat\"\nFROM x\nGROUP BY service_name, span_name",
"refId": "A",
"select": [
[
{
"params": [
"span_duration_ms"
],
"type": "column"
}
]
],
"table": "event",
"timeColumn": "\"time\"",
"timeColumnType": "timestamp",
"where": [
{
"name": "$__timeFilter",
"params": [ ],
"type": "macro"
}
]
},
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"format": "table",
"group": [ ],
"hide": false,
"metricColumn": "none",
"rawQuery": true,
"rawSql": "WITH RECURSIVE x AS\n(\n SELECT\n trace_id,\n span_id,\n parent_span_id,\n service_name,\n span_name,\n null::text as id,\n null::text as source,\n null::text as target\n FROM ps_trace.span\n WHERE start_time > NOW() - INTERVAL '10 minutes'\n AND service_name = '${service}'\n AND span_name = '${operation}'\n UNION ALL\n SELECT\n s.trace_id,\n s.span_id,\n s.parent_span_id,\n s.service_name,\n s.span_name,\n md5(s.service_name || '-' || s.span_name || '-' || x.service_name || '-' || x.span_name) as id,\n md5(x.service_name || '-' || x.span_name) as source,\n md5(s.service_name || '-' || s.span_name) as target\n FROM x\n INNER JOIN ps_trace.span s\n ON (x.trace_id = s.trace_id\n AND x.span_id = s.parent_span_id)\n AND s.start_time > NOW() - INTERVAL '10 minutes'\n)\nSELECT DISTINCT\n x.id,\n x.source,\n x.target \nFROM x\nWHERE id is not null",
"refId": "B",
"select": [
[
{
"params": [
"span_duration_ms"
],
"type": "column"
}
]
],
"table": "event",
"timeColumn": "\"time\"",
"timeColumnType": "timestamp",
"where": [
{
"name": "$__timeFilter",
"params": [ ],
"type": "macro"
}
]
}
],
"title": "Map of Downstream Dependencies (last 10 minutes)",
"transformations": [ ],
"type": "nodeGraph"
}
],
"schemaVersion": 34,
"style": "dark",
"tags": [
"promscale",
"apm"
],
"templating": {
"list": [
{
"allValue": "ALL",
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"definition": "SELECT DISTINCT service_name FROM ps_trace.span WHERE start_time > NOW() - INTERVAL '10 minutes'\n",
"hide": 0,
"includeAll": false,
"label": "Service",
"multi": false,
"name": "service",
"options": [ ],
"query": "SELECT DISTINCT service_name FROM ps_trace.span WHERE start_time > NOW() - INTERVAL '10 minutes'\n",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"type": "query"
},
{
"allValue": "ALL",
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"definition": "SELECT DISTINCT span_name FROM ps_trace.span WHERE service_name = ${service:sqlstring} AND start_time > NOW() - INTERVAL '10 minutes'\n)",
"hide": 0,
"includeAll": false,
"label": "Operation",
"multi": false,
"name": "operation",
"options": [ ],
"query": "SELECT DISTINCT span_name FROM ps_trace.span WHERE service_name = ${service:sqlstring} AND start_time > NOW() - INTERVAL '10 minutes'\n",
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"type": "query"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {
"hidden": true
},
"timezone": "",
"title": "[4] Downstream Dependencies",
"uid": "SDJmJvPnz",
"version": 7,
"weekStart": ""
}

View file

@ -0,0 +1,203 @@
{
"__inputs": [
{
"description": "",
"label": "TimescaleDB / PostgreSQL data source",
"name": "DS_TIMESCALEDB",
"pluginId": "postgres",
"pluginName": "PostgreSQL",
"type": "datasource"
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [ ],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "Map of upstream service dependencies for a specific service and operation",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 12,
"iteration": 1647519937731,
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"promscale",
"apm"
],
"targetBlank": false,
"title": "Menu",
"tooltip": "",
"type": "dashboards",
"url": ""
}
],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"description": "A.K.A. \"Who called me?\"",
"gridPos": {
"h": 26,
"w": 24,
"x": 0,
"y": 0
},
"id": 2,
"targets": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"format": "table",
"group": [ ],
"metricColumn": "none",
"rawQuery": true,
"rawSql": "WITH RECURSIVE x AS\n(\n SELECT\n trace_id,\n span_id,\n parent_span_id,\n service_name,\n span_name\n FROM ps_trace.span\n WHERE start_time > NOW() - INTERVAL '10 minutes'\n AND service_name = '${service}'\n AND span_name = '${operation}'\n UNION ALL\n SELECT\n s.trace_id,\n s.span_id,\n s.parent_span_id,\n s.service_name,\n s.span_name\n FROM x\n INNER JOIN ps_trace.span s\n ON (x.trace_id = s.trace_id\n AND x.parent_span_id = s.span_id)\n AND s.start_time > NOW() - INTERVAL '10 minutes'\n)\nSELECT\n md5(service_name || '-' || span_name) as id,\n span_name as title,\n service_name as \"subTitle\",\n count(*) as \"mainStat\"\nFROM x\nGROUP BY service_name, span_name",
"refId": "A",
"select": [
[
{
"params": [
"span_duration_ms"
],
"type": "column"
}
]
],
"table": "event",
"timeColumn": "\"time\"",
"timeColumnType": "timestamp",
"where": [
{
"name": "$__timeFilter",
"params": [ ],
"type": "macro"
}
]
},
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"format": "table",
"group": [ ],
"hide": false,
"metricColumn": "none",
"rawQuery": true,
"rawSql": "WITH RECURSIVE x AS\n(\n SELECT\n trace_id,\n span_id,\n parent_span_id,\n service_name,\n span_name,\n null::text as id,\n null::text as target,\n null::text as source\n FROM ps_trace.span\n WHERE start_time > NOW() - INTERVAL '10 minutes'\n AND service_name = '${service}'\n AND span_name = '${operation}'\n UNION ALL\n SELECT\n s.trace_id,\n s.span_id,\n s.parent_span_id,\n s.service_name,\n s.span_name,\n md5(s.service_name || '-' || s.span_name || '-' || x.service_name || '-' || x.span_name) as id,\n md5(x.service_name || '-' || x.span_name) as target,\n md5(s.service_name || '-' || s.span_name) as source\n FROM x\n INNER JOIN ps_trace.span s\n ON (x.trace_id = s.trace_id\n AND x.parent_span_id = s.span_id)\n AND s.start_time > NOW() - INTERVAL '10 minutes'\n)\nSELECT DISTINCT\n x.id,\n x.target,\n x.source \nFROM x\nWHERE id is not null",
"refId": "B",
"select": [
[
{
"params": [
"span_duration_ms"
],
"type": "column"
}
]
],
"table": "event",
"timeColumn": "\"time\"",
"timeColumnType": "timestamp",
"where": [
{
"name": "$__timeFilter",
"params": [ ],
"type": "macro"
}
]
}
],
"title": "Map of Upstream Depencies (last 10 minutes)",
"transformations": [ ],
"type": "nodeGraph"
}
],
"schemaVersion": 34,
"style": "dark",
"tags": [
"promscale",
"apm"
],
"templating": {
"list": [
{
"allValue": "ALL",
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"definition": "SELECT DISTINCT service_name FROM ps_trace.span WHERE start_time > NOW() - INTERVAL '10 minutes'\n",
"hide": 0,
"includeAll": false,
"label": "Service",
"multi": false,
"name": "service",
"options": [ ],
"query": "SELECT DISTINCT service_name FROM ps_trace.span WHERE start_time > NOW() - INTERVAL '10 minutes'\n",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"type": "query"
},
{
"allValue": "ALL",
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"definition": "SELECT DISTINCT span_name FROM ps_trace.span WHERE service_name = ${service:sqlstring} AND start_time > NOW() - INTERVAL '10 minutes'\n)",
"hide": 0,
"includeAll": false,
"label": "Operation",
"multi": false,
"name": "operation",
"options": [ ],
"query": "SELECT DISTINCT span_name FROM ps_trace.span WHERE service_name = ${service:sqlstring} AND start_time > NOW() - INTERVAL '10 minutes'\n",
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"type": "query"
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"hidden": true
},
"timezone": "",
"title": "[5] Upstream Dependencies",
"uid": "o4PPTDPnz",
"version": 14,
"weekStart": ""
}

View file

@ -0,0 +1,751 @@
{
"__inputs": [
{
"description": "",
"label": "TimescaleDB / PostgreSQL data source",
"name": "DS_TIMESCALEDB",
"pluginId": "postgres",
"pluginName": "PostgreSQL",
"type": "datasource"
},
{
"description": "",
"label": "Promscale Jaeger Tracing data source",
"name": "DS_PROMSCALE_JAEGER",
"pluginId": "jaeger",
"pluginName": "Jaeger",
"type": "datasource"
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [ ],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 2,
"id": 9,
"iteration": 1647523274899,
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"promscale",
"apm"
],
"targetBlank": false,
"title": "Menu",
"tooltip": "",
"type": "dashboards",
"url": ""
}
],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "reqps"
},
"overrides": [ ]
},
"gridPos": {
"h": 11,
"w": 8,
"x": 0,
"y": 0
},
"id": 2,
"interval": "1s",
"maxDataPoints": 300,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "8.3.3",
"targets": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"format": "time_series",
"group": [ ],
"metricColumn": "none",
"rawQuery": true,
"rawSql": "SELECT\n time_bucket_gapfill('$__interval', start_time) AS time,\n coalesce(count(*)::numeric / (EXTRACT(epoch FROM '$__interval'::interval)), 0) AS \"Requests\"\nFROM ps_trace.span s\nWHERE $__timeFilter(start_time)\nAND (span_kind = 'SPAN_KIND_SERVER' OR parent_span_id is NULL)\nAND service_name = '${service}'\nGROUP BY 1\nORDER BY 1",
"refId": "A",
"select": [ ],
"table": "event",
"timeColumn": "\"time\"",
"timeColumnType": "timestamp",
"where": [
{
"name": "$__timeFilter",
"params": [ ],
"type": "macro"
}
]
}
],
"title": "Requests",
"type": "timeseries"
},
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "ms"
},
"overrides": [ ]
},
"gridPos": {
"h": 11,
"w": 8,
"x": 8,
"y": 0
},
"id": 3,
"interval": "1s",
"maxDataPoints": 300,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "8.3.3",
"targets": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"format": "time_series",
"group": [ ],
"metricColumn": "none",
"rawQuery": true,
"rawSql": "SELECT\n time_bucket_gapfill('$__interval', start_time) AS time,\n COALESCE(ROUND(approx_percentile(0.99, percentile_agg(duration_ms))::numeric, 3), 0) as \"p99\",\n COALESCE(ROUND(approx_percentile(0.90, percentile_agg(duration_ms))::numeric, 3), 0) as \"p90\",\n COALESCE(ROUND(approx_percentile(0.50, percentile_agg(duration_ms))::numeric, 3), 0) as \"p50\",\n COALESCE(AVG(duration_ms), 0) as \"Average\"\nFROM ps_trace.span s\nWHERE $__timeFilter(start_time)\nAND (span_kind = 'SPAN_KIND_SERVER' OR parent_span_id is NULL)\nAND service_name = '${service}'\nGROUP BY 1\nORDER BY 1",
"refId": "A",
"select": [ ],
"table": "event",
"timeColumn": "\"time\"",
"timeColumnType": "timestamp",
"where": [
{
"name": "$__timeFilter",
"params": [ ],
"type": "macro"
}
]
}
],
"title": "Duration",
"type": "timeseries"
},
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMax": 1,
"axisSoftMin": 0,
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "line"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "percentunit"
},
"overrides": [ ]
},
"gridPos": {
"h": 11,
"w": 8,
"x": 16,
"y": 0
},
"id": 4,
"interval": "1s",
"maxDataPoints": 300,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"targets": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"format": "time_series",
"group": [ ],
"metricColumn": "none",
"rawQuery": true,
"rawSql": "SELECT\n time_bucket('$__interval', start_time) as time,\n coalesce(count(*) filter (where status_code = 'STATUS_CODE_ERROR')::numeric / count(*), 0) as \"Error rate\"\nFROM ps_trace.span s\nWHERE $__timeFilter(start_time)\nAND (span_kind = 'SPAN_KIND_SERVER' OR parent_span_id is NULL)\nAND service_name = '${service}'\nGROUP BY 1\nORDER BY 1",
"refId": "A",
"select": [ ],
"table": "event",
"timeColumn": "\"time\"",
"timeColumnType": "timestamp",
"where": [
{
"name": "$__timeFilter",
"params": [ ],
"type": "macro"
}
]
}
],
"title": "Error Rate",
"type": "timeseries"
},
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"displayMode": "auto"
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Requests"
},
"properties": [
{
"id": "unit",
"value": "reqps"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Avg Duration"
},
"properties": [
{
"id": "unit",
"value": "ms"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Error rate"
},
"properties": [
{
"id": "unit",
"value": "percentunit"
}
]
}
]
},
"gridPos": {
"h": 12,
"w": 8,
"x": 0,
"y": 11
},
"id": 10,
"options": {
"footer": {
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true,
"sortBy": [ ]
},
"pluginVersion": "8.3.3",
"targets": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"format": "table",
"group": [ ],
"metricColumn": "none",
"rawQuery": true,
"rawSql": "SELECT\n span_name as \"Operation\",\n count(*)::numeric / (${__to:date:seconds} - ${__from:date:seconds}) AS \"Requests\",\n sum(duration_ms) / count(*)::numeric as \"Avg Duration\",\n coalesce((count(*) filter (where status_code = 'STATUS_CODE_ERROR')::numeric / count(*)), 0) as \"Error rate\"\nFROM ps_trace.span s\nWHERE $__timeFilter(start_time)\nAND (span_kind = 'SPAN_KIND_SERVER' OR parent_span_id is NULL)\nAND service_name = '${service}'\nGROUP BY 1\nORDER BY 1",
"refId": "A",
"select": [ ],
"table": "event",
"timeColumn": "\"time\"",
"timeColumnType": "timestamp",
"where": [
{
"name": "$__timeFilter",
"params": [ ],
"type": "macro"
}
]
}
],
"title": "Statistics by Operation",
"type": "table"
},
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"displayMode": "auto"
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Duration"
},
"properties": [
{
"id": "unit",
"value": "ms"
},
{
"id": "decimals",
"value": 2
},
{
"id": "custom.width",
"value": 143
}
]
},
{
"matcher": {
"id": "byName",
"options": "Trace ID"
},
"properties": [
{
"id": "custom.width",
"value": 282
},
{
"id": "links",
"value": [
{
"targetBlank": true,
"title": "View trace details",
"url": "/explore?left=%5B%22${__from}%22,%22${__to}%22,%22${DS_PROMSCALE_JAEGER}%22,%7B\"query\":\"${__value.raw}\"%7D%5D"
}
]
}
]
},
{
"matcher": {
"id": "byName",
"options": "start_time"
},
"properties": [
{
"id": "custom.width",
"value": 182
}
]
},
{
"matcher": {
"id": "byName",
"options": "Trace ID"
},
"properties": [
{
"id": "custom.width",
"value": 94
}
]
}
]
},
"gridPos": {
"h": 12,
"w": 8,
"x": 8,
"y": 11
},
"id": 7,
"options": {
"footer": {
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true,
"sortBy": [ ]
},
"pluginVersion": "8.3.3",
"targets": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"format": "table",
"group": [ ],
"metricColumn": "none",
"rawQuery": true,
"rawSql": "SELECT\n replace(trace_id::text, '-'::text, ''::text) as \"Trace ID\",\n span_name as \"Operation\",\n start_time as \"Time\",\n duration_ms as \"Duration\"\nFROM ps_trace.span\nWHERE $__timeFilter(start_time)\nAND (span_kind = 'SPAN_KIND_SERVER' OR parent_span_id is NULL)\nAND service_name = '${service}'\nORDER BY duration_ms DESC\nLIMIT 50\n;",
"refId": "A",
"select": [ ],
"table": "event",
"timeColumn": "\"time\"",
"timeColumnType": "timestamp",
"where": [
{
"name": "$__timeFilter",
"params": [ ],
"type": "macro"
}
]
}
],
"title": "Slowest Operation Executions",
"type": "table"
},
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"displayMode": "auto"
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [ ]
},
"gridPos": {
"h": 12,
"w": 8,
"x": 16,
"y": 11
},
"id": 9,
"options": {
"footer": {
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true,
"sortBy": [ ]
},
"pluginVersion": "8.3.3",
"targets": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"format": "table",
"group": [ ],
"metricColumn": "none",
"rawQuery": true,
"rawSql": "SELECT\n status_message as \"Error\",\n count(*) as \"Occurrences\"\nFROM ps_trace.span\nWHERE $__timeFilter(start_time) AND\nstatus_code = 'STATUS_CODE_ERROR' AND\nservice_name = '${service}'\nGROUP BY 1\nORDER BY 2 DESC\n;",
"refId": "A",
"select": [ ],
"table": "event",
"timeColumn": "\"time\"",
"timeColumnType": "timestamp",
"where": [
{
"name": "$__timeFilter",
"params": [ ],
"type": "macro"
}
]
}
],
"title": "Most Common Errors",
"type": "table"
}
],
"refresh": "",
"schemaVersion": 34,
"style": "dark",
"tags": [
"promscale",
"apm"
],
"templating": {
"list": [
{
"datasource": {
"type": "postgres",
"uid": "${DS_TIMESCALEDB}"
},
"definition": "SELECT \n distinct(service_name)\nFROM ps_trace.span\nWHERE $__timeFilter(start_time)\n",
"hide": 0,
"includeAll": false,
"label": "Service",
"multi": false,
"name": "service",
"options": [ ],
"query": "SELECT \n distinct(service_name)\nFROM ps_trace.span\nWHERE $__timeFilter(start_time)\n",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"type": "query"
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": { },
"timezone": "",
"title": "[2] Service Details",
"uid": "YWfN6wL7z",
"version": 36,
"weekStart": ""
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1 @@
null

View file

@ -463,7 +463,7 @@
"multi": false, "multi": false,
"name": "job", "name": "job",
"options": [ ], "options": [ ],
"query": "label_values(up{job=~\".*thanos-store.*\"}, job)", "query": "label_values(up{job=~\".*thanos-bucket-replicate.*\"}, job)",
"refresh": 1, "refresh": 1,
"regex": "", "regex": "",
"sort": 2, "sort": 2,

View file

@ -1771,7 +1771,7 @@
"multi": false, "multi": false,
"name": "job", "name": "job",
"options": [ ], "options": [ ],
"query": "label_values(up{job=~\".*thanos-store.*\"}, job)", "query": "label_values(up{job=~\".*thanos-compact.*\"}, job)",
"refresh": 1, "refresh": 1,
"regex": "", "regex": "",
"sort": 2, "sort": 2,

View file

@ -2142,29 +2142,6 @@
"query": "5m,10m,30m,1h,6h,12h", "query": "5m,10m,30m,1h,6h,12h",
"refresh": 2, "refresh": 2,
"type": "interval" "type": "interval"
},
{
"allValue": null,
"current": {
"text": "all",
"value": "$__all"
},
"datasource": "$datasource",
"hide": 0,
"includeAll": true,
"label": "job",
"multi": false,
"name": "job",
"options": [ ],
"query": "label_values(up{job=~\".*thanos-store.*\"}, job)",
"refresh": 1,
"regex": "",
"sort": 2,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
} }
] ]
}, },

View file

@ -1069,7 +1069,7 @@
"multi": false, "multi": false,
"name": "job", "name": "job",
"options": [ ], "options": [ ],
"query": "label_values(up{job=~\".*thanos-store.*\"}, job)", "query": "label_values(up{job=~\".*thanos-query-frontend.*\"}, job)",
"refresh": 1, "refresh": 1,
"regex": "", "regex": "",
"sort": 2, "sort": 2,

View file

@ -1830,7 +1830,7 @@
"multi": false, "multi": false,
"name": "job", "name": "job",
"options": [ ], "options": [ ],
"query": "label_values(up{job=~\".*thanos-store.*\"}, job)", "query": "label_values(up{job=~\".*thanos-query.*\"}, job)",
"refresh": 1, "refresh": 1,
"regex": "", "regex": "",
"sort": 2, "sort": 2,

View file

@ -2174,7 +2174,7 @@
"multi": false, "multi": false,
"name": "job", "name": "job",
"options": [ ], "options": [ ],
"query": "label_values(up{job=~\".*thanos-store.*\"}, job)", "query": "label_values(up{job=~\".*thanos-receive.*\"}, job)",
"refresh": 1, "refresh": 1,
"regex": "", "regex": "",
"sort": 2, "sort": 2,

View file

@ -1822,7 +1822,7 @@
"multi": false, "multi": false,
"name": "job", "name": "job",
"options": [ ], "options": [ ],
"query": "label_values(up{job=~\".*thanos-store.*\"}, job)", "query": "label_values(up{job=~\".*thanos-rule.*\"}, job)",
"refresh": 1, "refresh": 1,
"regex": "", "regex": "",
"sort": 2, "sort": 2,

View file

@ -1459,7 +1459,7 @@
"multi": false, "multi": false,
"name": "job", "name": "job",
"options": [ ], "options": [ ],
"query": "label_values(up{job=~\".*thanos-store.*\"}, job)", "query": "label_values(up{job=~\".*thanos-sidecar.*\"}, job)",
"refresh": 1, "refresh": 1,
"regex": "", "regex": "",
"sort": 2, "sort": 2,

View file

@ -1,9 +1,23 @@
module github.com/monitoring-mixins/website/hack module github.com/monitoring-mixins/website/hack
go 1.14 go 1.17
require ( require (
github.com/brancz/gojsontoyaml v0.0.0-20191212081931-bf2969bbd742 github.com/brancz/gojsontoyaml v0.0.0-20191212081931-bf2969bbd742
github.com/google/go-jsonnet v0.16.1-0.20200703153429-aaf50f5b655f github.com/google/go-jsonnet v0.18.0
github.com/jsonnet-bundler/jsonnet-bundler v0.4.0 github.com/jsonnet-bundler/jsonnet-bundler v0.4.0
) )
require (
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc // indirect
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf // indirect
github.com/fatih/color v1.10.0 // indirect
github.com/ghodss/yaml v1.0.0 // indirect
github.com/mattn/go-colorable v0.1.8 // indirect
github.com/mattn/go-isatty v0.0.12 // indirect
github.com/pkg/errors v0.8.0 // indirect
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae // indirect
gopkg.in/alecthomas/kingpin.v2 v2.2.6 // indirect
gopkg.in/yaml.v2 v2.2.7 // indirect
sigs.k8s.io/yaml v1.1.0 // indirect
)

View file

@ -11,10 +11,14 @@ github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/fatih/color v1.9.0 h1:8xPHl4/q1VyqGIPif1F+1V3Y3lSmrq01EabUW3CoW5s= github.com/fatih/color v1.9.0 h1:8xPHl4/q1VyqGIPif1F+1V3Y3lSmrq01EabUW3CoW5s=
github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU= github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=
github.com/fatih/color v1.10.0 h1:s36xzo75JdqLaaWoiEHk767eHiwo0598uUxyfiPkDsg=
github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM=
github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk= github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/google/go-jsonnet v0.16.1-0.20200703153429-aaf50f5b655f h1:mw4KoMG5/DXLPhpKXQRYTEIZFkFo0a1HU2R1HbeYpek= github.com/google/go-jsonnet v0.16.1-0.20200703153429-aaf50f5b655f h1:mw4KoMG5/DXLPhpKXQRYTEIZFkFo0a1HU2R1HbeYpek=
github.com/google/go-jsonnet v0.16.1-0.20200703153429-aaf50f5b655f/go.mod h1:sOcuej3UW1vpPTZOr8L7RQimqai1a57bt5j22LzGZCw= github.com/google/go-jsonnet v0.16.1-0.20200703153429-aaf50f5b655f/go.mod h1:sOcuej3UW1vpPTZOr8L7RQimqai1a57bt5j22LzGZCw=
github.com/google/go-jsonnet v0.18.0 h1:/6pTy6g+Jh1a1I2UMoAODkqELFiVIdOxbNwv0DDzoOg=
github.com/google/go-jsonnet v0.18.0/go.mod h1:C3fTzyVJDslXdiTqw/bTFk7vSGyCtH3MGRbDfvEwGd0=
github.com/jsonnet-bundler/jsonnet-bundler v0.4.0 h1:4BKZ6LDqPc2wJDmaKnmYD/vDjUptJtnUpai802MibFc= github.com/jsonnet-bundler/jsonnet-bundler v0.4.0 h1:4BKZ6LDqPc2wJDmaKnmYD/vDjUptJtnUpai802MibFc=
github.com/jsonnet-bundler/jsonnet-bundler v0.4.0/go.mod h1:/by7P/OoohkI3q4CgSFqcoFsVY+IaNbzOVDknEsKDeU= github.com/jsonnet-bundler/jsonnet-bundler v0.4.0/go.mod h1:/by7P/OoohkI3q4CgSFqcoFsVY+IaNbzOVDknEsKDeU=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
@ -25,10 +29,14 @@ github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
github.com/mattn/go-colorable v0.1.4 h1:snbPLB8fVfU9iwbbo30TPtbLRzwWu6aJS6Xh4eaaviA= github.com/mattn/go-colorable v0.1.4 h1:snbPLB8fVfU9iwbbo30TPtbLRzwWu6aJS6Xh4eaaviA=
github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
github.com/mattn/go-colorable v0.1.8 h1:c1ghPdyEDarC70ftn0y+A/Ee++9zz8ljHG1b13eJ0s8=
github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-isatty v0.0.6/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= github.com/mattn/go-isatty v0.0.6/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mattn/go-isatty v0.0.11 h1:FxPOTFNqGkuDUGi3H/qkUbQO4ZiBa2brKq5r0l8TGeM= github.com/mattn/go-isatty v0.0.11 h1:FxPOTFNqGkuDUGi3H/qkUbQO4ZiBa2brKq5r0l8TGeM=
github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE= github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE=
github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHXY=
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
github.com/pkg/errors v0.8.0 h1:WdK/asTD0HN+q6hsWO3/vpuAkAr+tw6aNJNDFFf0+qw= github.com/pkg/errors v0.8.0 h1:WdK/asTD0HN+q6hsWO3/vpuAkAr+tw6aNJNDFFf0+qw=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
@ -43,6 +51,9 @@ golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5h
golang.org/x/sys v0.0.0-20190310054646-10058d7d4faa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190310054646-10058d7d4faa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037 h1:YyJpGZS1sBuBCzLAR1VEpK193GlqGZbnPFnPV/5Rsb4= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037 h1:YyJpGZS1sBuBCzLAR1VEpK193GlqGZbnPFnPV/5Rsb4=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae h1:/WDfKMnPU+m5M4xB+6x4kaepxRw6jWvR5iDRdvjHgy8=
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc= gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
@ -52,3 +63,7 @@ gopkg.in/yaml.v2 v2.1.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I= gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.7 h1:VUgggvou5XRW9mHwD/yXxIYSMtY0zoKQf/v226p2nyo=
gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
sigs.k8s.io/yaml v1.1.0 h1:4A07+ZFc2wgJwo8YNlQpr1rVlgUDlxXHhPJciaPY5gs=
sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o=

View file

@ -114,6 +114,11 @@
"name": "promtail", "name": "promtail",
"source": "https://github.com/grafana/loki", "source": "https://github.com/grafana/loki",
"subdir": "production/promtail-mixin" "subdir": "production/promtail-mixin"
},
{
"name": "promscale",
"source": "https://github.com/timescale/promscale",
"subdir": "docs/mixin"
} }
] ]
} }

View file

@ -0,0 +1,432 @@
---
title: promscale
---
## Overview
{{< panel style="danger" >}}
Jsonnet source code is available at [github.com/timescale/promscale](https://github.com/timescale/promscale/tree/master/docs/mixin)
{{< /panel >}}
## Alerts
{{< panel style="warning" >}}
Complete list of pregenerated alerts is available [here](https://github.com/monitoring-mixins/website/blob/master/assets/promscale/alerts.yaml).
{{< /panel >}}
### promscale-general
##### PromscaleDown
{{< code lang="yaml" >}}
alert: PromscaleDown
annotations:
description: No Promscale instance was found.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleDown.md
summary: Promscale is down
expr: absent(up{job=~".*promscale.*"})
labels:
severity: critical
{{< /code >}}
### promscale-ingest
##### PromscaleIngestHighErrorRate
{{< code lang="yaml" >}}
alert: PromscaleIngestHighErrorRate
annotations:
description: Promscale ingestion is having a {{ $value | humanizePercentage }} error
rate.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleIngestHighErrorRate.md
summary: High error rate in Promscale ingestion
expr: |
(
sum by (job, instance, type) (
rate(promscale_ingest_requests_total{code=~"5.."}[5m])
)
/
sum by (job, instance, type) (
rate(promscale_ingest_requests_total[5m])
)
) > 0.05
labels:
severity: warning
{{< /code >}}
##### PromscaleIngestHighErrorRate
{{< code lang="yaml" >}}
alert: PromscaleIngestHighErrorRate
annotations:
description: Promscale ingestion is having a {{ $value | humanizePercentage }} error
rate.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleIngestHighErrorRate.md
summary: High error rate in Promscale ingestion
expr: |
(
sum by (job, instance, type) (
rate(promscale_ingest_requests_total{code=~"5.."}[5m])
)
/
sum by (job, instance, type) (
rate(promscale_ingest_requests_total[5m])
)
) > 0.1
labels:
severity: critical
{{< /code >}}
##### PromscaleIngestHighLatency
{{< code lang="yaml" >}}
alert: PromscaleIngestHighLatency
annotations:
description: Slowest 10% of ingestion batch took more than {{ $value }} seconds
to ingest.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleIngestHighLatency.md
summary: Slow Promscale ingestion
expr: |
(
histogram_quantile(
0.90,
sum by (job, instance, type, le) (
rate(promscale_ingest_duration_seconds_bucket[5m])
)
) > 10
and
sum by (job, instance, type) (
rate(promscale_ingest_duration_seconds_bucket[5m])
)
) > 0
for: 5m
labels:
severity: warning
{{< /code >}}
##### PromscaleIngestHighLatency
{{< code lang="yaml" >}}
alert: PromscaleIngestHighLatency
annotations:
description: Slowest 10% of ingestion batch took more than {{ $value }} seconds
to ingest.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleIngestHighLatency.md
summary: Slow Promscale ingestion
expr: |
(
histogram_quantile(
0.90,
sum by (job, instance, type, le) (
rate(promscale_ingest_duration_seconds_bucket[5m])
)
) > 30
and
sum by (job, instance, type) (
rate(promscale_ingest_duration_seconds_bucket[5m])
)
) > 0
for: 5m
labels:
severity: critical
{{< /code >}}
### promscale-query
##### PromscaleQueryHighErrorRate
{{< code lang="yaml" >}}
alert: PromscaleQueryHighErrorRate
annotations:
description: Evaluating queries via Promscale has {{ $value | humanizePercentage
}} error rate.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleQueryHighErrorRate.md
summary: High error rate in querying Promscale
expr: |
(
sum by (job, instance, type) (
rate(promscale_query_requests_total{code=~"5.."}[5m])
)
/
sum by (job, instance, type) (
rate(promscale_query_requests_total[5m])
)
) > 0.05
labels:
severity: warning
{{< /code >}}
##### PromscaleQueryHighErrorRate
{{< code lang="yaml" >}}
alert: PromscaleQueryHighErrorRate
annotations:
description: Evaluating queries via Promscale had {{ $value | humanizePercentage
}} error rate.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleQueryHighErrorRate.md
summary: High error rate in querying Promscale
expr: |
(
sum by (job, instance, type) (
rate(promscale_query_requests_total{code=~"5.."}[5m])
)
/
sum by (job, instance, type) (
rate(promscale_query_requests_total[5m])
)
) > 0.1
labels:
severity: critical
{{< /code >}}
##### PromscaleQueryHighLatency
{{< code lang="yaml" >}}
alert: PromscaleQueryHighLatency
annotations:
description: Slowest 10% of the queries took more than {{ $value }} seconds to evaluate.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleQueryHighLatency.md
summary: Slow Promscale querying
expr: |
(
histogram_quantile(
0.90,
sum by (job, instance, type, le) (
rate(promscale_query_duration_seconds_bucket[5m])
)
) > 5
and
sum by (job, instance, type) (
rate(promscale_query_duration_seconds_bucket[5m])
) > 0
)
for: 5m
labels:
severity: warning
{{< /code >}}
##### PromscaleQueryHighLatency
{{< code lang="yaml" >}}
alert: PromscaleQueryHighLatency
annotations:
description: Slowest 10% of the queries took {{ $value }} seconds to evaluate.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleQueryHighLatency.md
summary: Slow Promscale querying
expr: |
(
histogram_quantile(
0.90,
sum by (job, instance, type, le) (
rate(promscale_query_duration_seconds_bucket[5m])
)
) > 10
and
sum by (job, instance, type) (
rate(promscale_query_duration_seconds_bucket[5m])
) > 0
)
for: 5m
labels:
severity: critical
{{< /code >}}
### promscale-cache
##### PromscaleCacheHighNumberOfEvictions
{{< code lang="yaml" >}}
alert: PromscaleCacheHighNumberOfEvictions
annotations:
description: Promscale {{ $labels.name }} is evicting at {{ $value }} entries a
second.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleCacheHighNumberOfEvictions.md
summary: High cache eviction in Promscale
expr: |
(
sum by (job, instance, name, type) (
rate(promscale_cache_evictions_total[5m])
)
/
sum by (job, instance, name, type) (
promscale_cache_capacity_elements
)
) > 0.2
labels:
severity: warning
{{< /code >}}
##### PromscaleCacheTooSmall
{{< code lang="yaml" >}}
alert: PromscaleCacheTooSmall
annotations:
description: Promscale {{ $labels.name }} has a hit ratio of {{ $value | humanizePercentage
}}.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleCacheTooSmall.md
summary: High cache eviction in Promscale
expr: |
(
sum by (job, instance, type, name) (
rate(promscale_cache_query_hits_total[5m])
)
/
sum by (job, instance, type, name) (
rate(promscale_cache_queries_total[5m])
)
) < 0.9
labels:
severity: warning
{{< /code >}}
### promscale-database-connection
##### PromscaleStorageHighErrorRate
{{< code lang="yaml" >}}
alert: PromscaleStorageHighErrorRate
annotations:
description: Promscale connection with the database has an error of {{ $value |
humanizePercentage }}.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleStorageHighErrorRate.md
summary: Promscale experiences a high error rate when connecting to the database
expr: |
(
sum by (job) (
# Error counter exists for query, query_row & exec, and not for send_batch.
rate(promscale_database_request_errors_total{method=~"query.*|exec"}[5m])
)
/
sum by (job) (
rate(promscale_database_requests_total{method=~"query.*|exec"}[5m])
)
) > 0.05
labels:
severity: warning
{{< /code >}}
##### PromscaleStorageHighLatency
{{< code lang="yaml" >}}
alert: PromscaleStorageHighLatency
annotations:
description: Slowest 10% of database requests are taking more than {{ $value }}
seconds to respond.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleStorageHighLatency.md
summary: Slow database response
expr: |
(
histogram_quantile(0.9,
sum by (le, job, type) (
rate(promscale_database_requests_duration_seconds_bucket[5m])
)
) > 5
and
sum by (job, type) (
rate(promscale_database_requests_duration_seconds_count[5m])
) > 0
)
labels:
severity: warning
{{< /code >}}
### promscale-database
##### PromscaleStorageUnhealthy
{{< code lang="yaml" >}}
alert: PromscaleStorageUnhealthy
annotations:
description: Promscale connection with the database has an error of {{ $value |
humanizePercentage }}.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleStorageUnhealthy.md
summary: Promscale database is unhealthy
expr: |
(
sum by (job) (
rate(promscale_sql_database_health_check_errors_total[5m])
)
/
sum by (job) (
rate(promscale_sql_database_health_check_total[5m])
)
) > 0.05
labels:
severity: warning
{{< /code >}}
##### PromscaleMaintenanceJobRunningTooLong
{{< code lang="yaml" >}}
alert: PromscaleMaintenanceJobRunningTooLong
annotations:
description: Promscale Database is taking {{ $value }} seconds to respond to Promscale's
requests.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleMaintenanceJobRunningTooLong.md
summary: Promscale maintenance jobs taking too long to complete
expr: |
(
(
(
time()
-
promscale_sql_database_worker_maintenance_job_start_timestamp_seconds
)
>
30 * 60 * 2 # 30 mins (we launch maintenance jobs scheduled at 30 mins) * 60 (to seconds) * 2 (wait max for 2 complete scans before firing alert).
)
and
promscale_sql_database_worker_maintenance_job_start_timestamp_seconds > 0
)
labels:
severity: warning
{{< /code >}}
##### PromscaleMaintenanceJobFailures
{{< code lang="yaml" >}}
alert: PromscaleMaintenanceJobFailures
annotations:
description: Promscale maintenance job failed to successfully execute.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleMaintenanceJobFailures.md
summary: Promscale maintenance job failed
expr: promscale_sql_database_worker_maintenance_job_failed == 1
labels:
severity: warning
{{< /code >}}
##### PromscaleCompressionLow
{{< code lang="yaml" >}}
alert: PromscaleCompressionLow
annotations:
description: High uncompressed data in Promscale, on average, {{ $value }} uncompressed
chunks per metric.
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleCompressionLow.md
summary: High uncompressed data
expr: |
(
(
(promscale_sql_database_chunks_count - promscale_sql_database_chunks_compressed_count) # Number of uncompressed chunks.
/
promscale_sql_database_metric_count
) > 4 # If total number of average uncompressed chunk per metric is more than 4 chunks at maximum, we should alert.
and
promscale_sql_database_compression_status == 1
)
labels:
severity: warning
{{< /code >}}
## Dashboards
Following dashboards are generated from mixins and hosted on github:
- [apm-dependencies](https://github.com/monitoring-mixins/website/blob/master/assets/promscale/dashboards/apm-dependencies.json)
- [apm-home](https://github.com/monitoring-mixins/website/blob/master/assets/promscale/dashboards/apm-home.json)
- [apm-service-dependencies-downstream](https://github.com/monitoring-mixins/website/blob/master/assets/promscale/dashboards/apm-service-dependencies-downstream.json)
- [apm-service-dependencies-upstream](https://github.com/monitoring-mixins/website/blob/master/assets/promscale/dashboards/apm-service-dependencies-upstream.json)
- [apm-service-overview](https://github.com/monitoring-mixins/website/blob/master/assets/promscale/dashboards/apm-service-overview.json)
- [promscale](https://github.com/monitoring-mixins/website/blob/master/assets/promscale/dashboards/promscale.json)

View file

@ -114,6 +114,11 @@
"name": "promtail", "name": "promtail",
"source": "https://github.com/grafana/loki", "source": "https://github.com/grafana/loki",
"subdir": "production/promtail-mixin" "subdir": "production/promtail-mixin"
},
{
"name": "promscale",
"source": "https://github.com/timescale/promscale",
"subdir": "docs/mixin"
} }
] ]
} }