mirror of
https://github.com/monitoring-mixins/website.git
synced 2024-12-14 11:37:31 +00:00
Merge pull request #41 from v-zhuravlev/jsonnet-ilbs
Add jsonnet-libs mixins
This commit is contained in:
commit
25d0e39bb9
444 changed files with 308117 additions and 23994 deletions
53
assets/MSSQL/alerts.yaml
Normal file
53
assets/MSSQL/alerts.yaml
Normal file
|
@ -0,0 +1,53 @@
|
|||
groups:
|
||||
- name: MSSQLAlerts
|
||||
rules:
|
||||
- alert: MSSQLHighNumberOfDeadlocks
|
||||
annotations:
|
||||
description: '{{ printf "%.2f" $value }} deadlocks have occurred over the last
|
||||
5 minutes on {{$labels.instance}}, which is above threshold of 10 deadlocks.'
|
||||
summary: There are deadlocks ocurring in the database.
|
||||
expr: |
|
||||
increase(mssql_deadlocks_total{}[5m]) > 10
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: MSSQLModerateReadStallTime
|
||||
annotations:
|
||||
description: '{{ printf "%.2f" $value }}ms of IO read stall has occurred on
|
||||
{{$labels.instance}}, which is above threshold of 200ms.'
|
||||
summary: There is a moderate amount of IO stall for database reads.
|
||||
expr: |
|
||||
1000 * increase(mssql_io_stall_seconds_total{operation="read"}[5m]) > 200
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: MSSQLHighReadStallTime
|
||||
annotations:
|
||||
description: '{{ printf "%.2f" $value }}ms of IO read stall has occurred on
|
||||
{{$labels.instance}}, which is above threshold of 400ms.'
|
||||
summary: There is a high amount of IO stall for database reads.
|
||||
expr: |
|
||||
1000 * increase(mssql_io_stall_seconds_total{operation="read"}[5m]) > 400
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: MSSQLModerateWriteStallTime
|
||||
annotations:
|
||||
description: '{{ printf "%.2f" $value }}ms of IO write stall has occurred on
|
||||
{{$labels.instance}}, which is above threshold of 200ms.'
|
||||
summary: There is a moderate amount of IO stall for database writes.
|
||||
expr: |
|
||||
1000 * increase(mssql_io_stall_seconds_total{operation="write"}[5m]) > 200
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: MSSQLHighWriteStallTime
|
||||
annotations:
|
||||
description: '{{ printf "%.2f" $value }}ms of IO write stall has occurred on
|
||||
{{$labels.instance}}, which is above threshold of 400ms.'
|
||||
summary: There is a high amount of IO stall for database writes.
|
||||
expr: |
|
||||
1000 * increase(mssql_io_stall_seconds_total{operation="write"}[5m]) > 400
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
1123
assets/MSSQL/dashboards/mssql-overview.json
Normal file
1123
assets/MSSQL/dashboards/mssql-overview.json
Normal file
File diff suppressed because it is too large
Load diff
515
assets/MSSQL/dashboards/mssql-pages.json
Normal file
515
assets/MSSQL/dashboards/mssql-pages.json
Normal file
|
@ -0,0 +1,515 @@
|
|||
{
|
||||
"__inputs": [ ],
|
||||
"__requires": [ ],
|
||||
"annotations": {
|
||||
"list": [ ]
|
||||
},
|
||||
"description": "An overview of MSSQL paging metrics.",
|
||||
"editable": false,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"id": null,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": false,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"mssql-mixin"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Other MSSQL dashboards",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Memory used for the OS page file.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 50,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "normal"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "mssql_os_page_file{job=~\"$job\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - {{state}}"
|
||||
}
|
||||
],
|
||||
"title": "Page file memory",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Percentage of page found and read from the SQL Server buffer cache.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"max": 100,
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "mssql_buffer_cache_hit_ratio{job=~\"$job\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Buffer cache hit percentage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Rate of page checkpoints per second.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "checkpoints/s"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "mssql_checkpoint_pages_sec{job=~\"$job\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Page checkpoints",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "The number of page faults that were incurred by the SQL Server process.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "faults"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"id": 5,
|
||||
"interval": "1m",
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "increase(mssql_page_fault_count_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Page faults",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "1m",
|
||||
"rows": [ ],
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"mssql-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": { },
|
||||
"hide": 0,
|
||||
"label": "Data Source",
|
||||
"name": "prometheus_datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(mssql_build_info{}, job)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "Cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(mssql_build_info{job=~\"$job\"}, cluster)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Instance",
|
||||
"multi": true,
|
||||
"name": "instance",
|
||||
"options": [ ],
|
||||
"query": "label_values(mssql_build_info{job=~\"$job\"}, instance)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "default",
|
||||
"title": "MSSQL pages",
|
||||
"uid": "mssql-pages",
|
||||
"version": 0
|
||||
}
|
1
assets/MSSQL/rules.yaml
Normal file
1
assets/MSSQL/rules.yaml
Normal file
|
@ -0,0 +1 @@
|
|||
null
|
95
assets/aerospike/alerts.yaml
Normal file
95
assets/aerospike/alerts.yaml
Normal file
|
@ -0,0 +1,95 @@
|
|||
groups:
|
||||
- name: aerospike
|
||||
rules:
|
||||
- alert: AerospikeNodeHighMemoryUsage
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} percent of system memory used on node
|
||||
{{$labels.instance}} on cluster {{$labels.aerospike_cluster}}, which is above
|
||||
the threshold of 80.'
|
||||
summary: There is a limited amount of memory available for a node.
|
||||
expr: |
|
||||
100 - sum without (service) (aerospike_node_stats_system_free_mem_pct) >= 80
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: AerospikeNamespaceHighDiskUsage
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} percent of disk space available for
|
||||
namespace {{$labels.ns}} on node {{$labels.instance}}, on cluster {{$labels.aerospike_cluster}},
|
||||
which is above the threshold of 80.'
|
||||
summary: There is a limited amount of disk space available for a node.
|
||||
expr: |
|
||||
100 - sum without (service) (aerospike_namespace_device_free_pct) >= 80
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: AerospikeUnavailablePartitions
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} unavailable partition(s) in namespace
|
||||
{{$labels.ns}}, on node {{$labels.instance}}, on cluster {{$labels.aerospike_cluster}},
|
||||
which is above the threshold of 0.'
|
||||
summary: There are unavailable partitions in the Aerospike cluster.
|
||||
expr: |
|
||||
sum without(service) (aerospike_namespace_unavailable_partitions) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: AerospikeDeadPartitions
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} dead partition(s) in namespace {{$labels.ns}},
|
||||
on node {{$labels.instance}}, on cluster {{$labels.aerospike_cluster}}, which
|
||||
is above the threshold of 0.'
|
||||
summary: There are dead partitions in the Aerospike cluster.
|
||||
expr: |
|
||||
sum without(service) (aerospike_namespace_dead_partitions) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: AerospikeNamespaceRejectingWrites
|
||||
annotations:
|
||||
description: Namespace {{$labels.ns}} on node {{$labels.instance}} on cluster
|
||||
{{$labels.aerospike_cluster}} is currently rejecting all client-originated
|
||||
writes.
|
||||
summary: A namespace is currently rejecting all writes. Check for unavailable/dead
|
||||
partitions, clock skew, or nodes running out of memory/disk.
|
||||
expr: |
|
||||
sum without(service) (aerospike_namespace_stop_writes + aerospike_namespace_clock_skew_stop_writes) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: AerospikeHighClientReadErrorRate
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} percent of client read transactions
|
||||
are resulting in errors for namespace {{$labels.ns}}, on node {{$labels.instance}},
|
||||
on cluster {{$labels.aerospike_cluster}}, which is above the threshold of
|
||||
25.'
|
||||
summary: There is a high rate of errors for client read transactions.
|
||||
expr: |
|
||||
sum without(service) (rate(aerospike_namespace_client_read_error[5m])) / (clamp_min(sum without(service) (rate(aerospike_namespace_client_read_error[5m])) + sum without(service) (rate(aerospike_namespace_client_read_success[5m])), 1)) > 25
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: AerospikeHighClientWriteErrorRate
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} percent of client write transactions
|
||||
are resulting in errors for namespace {{$labels.ns}}, on node {{$labels.instance}},
|
||||
on cluster {{$labels.aerospike_cluster}}, which is above the threshold of
|
||||
25.'
|
||||
summary: There is a high rate of errors for client write transactions.
|
||||
expr: |
|
||||
sum without(service) (rate(aerospike_namespace_client_write_error[5m])) / (clamp_min(sum without(service) (rate(aerospike_namespace_client_write_error[5m])) + sum without(service) (rate(aerospike_namespace_client_write_success[5m])), 1)) > 25
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: AerospikeHighClientUDFErrorRate
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} percent of client UDF transactions
|
||||
are resulting in errors for namespace {{$labels.ns}}, on node {{$labels.instance}},
|
||||
on cluster {{$labels.aerospike_cluster}}, which is above the threshold of
|
||||
25.'
|
||||
summary: There is a high rate of errors for client UDF transactions.
|
||||
expr: |
|
||||
sum without(service) (rate(aerospike_namespace_client_udf_error[5m])) / (clamp_min(sum without(service) (rate(aerospike_namespace_client_udf_error[5m])) + sum without(service) (rate(aerospike_namespace_client_udf_complete[5m])), 1)) > 25
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
1173
assets/aerospike/dashboards/aerospike-instance-overview.json
Normal file
1173
assets/aerospike/dashboards/aerospike-instance-overview.json
Normal file
File diff suppressed because it is too large
Load diff
325
assets/aerospike/dashboards/aerospike-logs.json
Normal file
325
assets/aerospike/dashboards/aerospike-logs.json
Normal file
|
@ -0,0 +1,325 @@
|
|||
{
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": false,
|
||||
"icon": "external link",
|
||||
"includeVars": false,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"aerospike-mixin"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Other Aerospike Dashboards",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"description": "Logs volume grouped by \"level\" label.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"drawStyle": "bars",
|
||||
"fillOpacity": 50,
|
||||
"stacking": {
|
||||
"mode": "normal"
|
||||
}
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(E|e)merg|(F|f)atal|(A|a)lert|(C|c)rit.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "purple",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(E|e)(rr.*|RR.*)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "red",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(W|w)(arn.*|ARN.*|rn|RN)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "orange",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(N|n)(otice|ote)|(I|i)(nf.*|NF.*)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "green",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "dbg.*|DBG.*|(D|d)(EBUG|ebug)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "blue",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(T|t)(race|RACE)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "light-blue",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "logs"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "text",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 24
|
||||
},
|
||||
"id": 1,
|
||||
"interval": "30s",
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"expr": "sum by (level) (count_over_time({job=~\"integrations/aerospike\",job=~\"$job\",aerospike_cluster=~\"$aerospike_cluster\",instance=~\"$instance\",context=~\"$context\",level=~\"$level\"}\n|~ \"$regex_search\"\n\n[$__interval]))\n",
|
||||
"legendFormat": "{{ level }}"
|
||||
}
|
||||
],
|
||||
"title": "Logs volume",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "renameByRegex",
|
||||
"options": {
|
||||
"regex": "Value",
|
||||
"renamePattern": "logs"
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 18,
|
||||
"w": 24
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"dedupStrategy": "exact",
|
||||
"enableLogDetails": true,
|
||||
"prettifyLogMessage": true,
|
||||
"showTime": false,
|
||||
"wrapLogMessage": true
|
||||
},
|
||||
"pluginVersion": "v10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"expr": "{job=~\"integrations/aerospike\",job=~\"$job\",aerospike_cluster=~\"$aerospike_cluster\",instance=~\"$instance\",context=~\"$context\",level=~\"$level\"} \n|~ \"$regex_search\"\n\n\n"
|
||||
}
|
||||
],
|
||||
"title": "Logs",
|
||||
"type": "logs"
|
||||
}
|
||||
],
|
||||
"refresh": "1m",
|
||||
"schemaVersion": 36,
|
||||
"tags": [
|
||||
"aerospike-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"label": "Loki data source",
|
||||
"name": "loki_datasource",
|
||||
"query": "loki",
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "Job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"query": "label_values({job=~\"integrations/aerospike\"}, job)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "Aerospike_cluster",
|
||||
"multi": true,
|
||||
"name": "aerospike_cluster",
|
||||
"query": "label_values({job=~\"integrations/aerospike\",job=~\"$job\"}, aerospike_cluster)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "Instance",
|
||||
"multi": true,
|
||||
"name": "instance",
|
||||
"query": "label_values({job=~\"integrations/aerospike\",job=~\"$job\",aerospike_cluster=~\"$aerospike_cluster\"}, instance)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "Context",
|
||||
"multi": true,
|
||||
"name": "context",
|
||||
"query": "label_values({job=~\"integrations/aerospike\",job=~\"$job\",aerospike_cluster=~\"$aerospike_cluster\",instance=~\"$instance\"}, context)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "Level",
|
||||
"multi": true,
|
||||
"name": "level",
|
||||
"query": "label_values({job=~\"integrations/aerospike\",job=~\"$job\",aerospike_cluster=~\"$aerospike_cluster\",instance=~\"$instance\",context=~\"$context\"}, level)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"label": "Regex search",
|
||||
"name": "regex_search",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "",
|
||||
"value": ""
|
||||
}
|
||||
],
|
||||
"query": "",
|
||||
"type": "textbox"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "Aerospike logs",
|
||||
"uid": "aerospike-logs"
|
||||
}
|
967
assets/aerospike/dashboards/aerospike-namespace-overview.json
Normal file
967
assets/aerospike/dashboards/aerospike-namespace-overview.json
Normal file
|
@ -0,0 +1,967 @@
|
|||
{
|
||||
"__inputs": [ ],
|
||||
"__requires": [ ],
|
||||
"annotations": {
|
||||
"list": [ ]
|
||||
},
|
||||
"description": "",
|
||||
"editable": false,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"id": null,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": false,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"aerospike-mixin"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Other Aerospike Dashboards",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Number of unavailable data partitions in an Aerospike namespace.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"minVizHeight": 10,
|
||||
"minVizWidth": 0,
|
||||
"orientation": "vertical",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"showUnfilled": true,
|
||||
"text": {
|
||||
"valueSize": 100
|
||||
},
|
||||
"valueMode": "color"
|
||||
},
|
||||
"pluginVersion": "10.2.0-59542pre",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "aerospike_namespace_unavailable_partitions{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{aerospike_cluster}} - {{ns}}"
|
||||
}
|
||||
],
|
||||
"title": "Unavailable partitions",
|
||||
"type": "bargauge"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Disk utilization in an Aerospike namespace.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "continuous-BlYlRd"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 20,
|
||||
"gradientMode": "scheme",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "smooth",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"max": 100,
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 16,
|
||||
"x": 8,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "100 - aerospike_namespace_device_free_pct{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{aerospike_cluster}} - {{ns}}"
|
||||
}
|
||||
],
|
||||
"title": "Disk usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Number of dead data partitions in an Aerospike namespace.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"minVizHeight": 10,
|
||||
"minVizWidth": 0,
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"showUnfilled": true,
|
||||
"text": {
|
||||
"valueSize": 100
|
||||
},
|
||||
"valueMode": "color"
|
||||
},
|
||||
"pluginVersion": "10.2.0-59542pre",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "aerospike_namespace_dead_partitions{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{aerospike_cluster}} - {{ns}}"
|
||||
}
|
||||
],
|
||||
"title": "Dead partitions",
|
||||
"type": "bargauge"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Memory utilization in an Aerospike namespace.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "continuous-BlYlRd"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 20,
|
||||
"gradientMode": "scheme",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "smooth",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"max": 100,
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 16,
|
||||
"x": 8,
|
||||
"y": 8
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "100 - aerospike_namespace_memory_free_pct{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{aerospike_cluster}} - {{ns}}"
|
||||
}
|
||||
],
|
||||
"title": "Memory usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Rate of client read transactions in an Aerospike namespace organized by result.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 20,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "smooth",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "normal"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "rps"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 16
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"min",
|
||||
"mean",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_read_success{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{aerospike_cluster}} - {{ns}} - success"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_read_error{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{aerospike_cluster}} - {{ns}} - error"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_read_filtered_out{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{aerospike_cluster}} - {{ns}} - filtered"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_read_timeout{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{aerospike_cluster}} - {{ns}} - timeout"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_read_not_found{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{aerospike_cluster}} - {{ns}} - not found"
|
||||
}
|
||||
],
|
||||
"title": "Client reads",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Rate of client write transactions in an Aerospike namespace organized by result.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 20,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "smooth",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "normal"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "wps"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 16
|
||||
},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"min",
|
||||
"mean",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_write_success{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{aerospike_cluster}} - {{ns}} - success"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_write_error{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{aerospike_cluster}} - {{ns}} - error"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_write_filtered_out{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{aerospike_cluster}} - {{ns}} - filtered"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_write_timeout{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{aerospike_cluster}} - {{ns}} - timeout"
|
||||
}
|
||||
],
|
||||
"title": "Client writes",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Rate of client UDF transactions in an Aerospike namespace organized by result.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 20,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "smooth",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "normal"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "ops"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 24
|
||||
},
|
||||
"id": 8,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"min",
|
||||
"mean",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_udf_complete{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{aerospike_cluster}} - {{ns}} - complete"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_udf_error{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{aerospike_cluster}} - {{ns}} - error"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_udf_filtered_out{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{aerospike_cluster}} - {{ns}} - filtered"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_udf_timeout{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{aerospike_cluster}} - {{ns}} - timeout"
|
||||
}
|
||||
],
|
||||
"title": "Client UDF transactions",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Percentage of read transactions that are resolved by a cache hit in an Aerospike namespace.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 20,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "smooth",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"max": 100,
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 24
|
||||
},
|
||||
"id": 9,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "aerospike_namespace_cache_read_pct{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{aerospike_cluster}} - {{ns}}"
|
||||
}
|
||||
],
|
||||
"title": "Cache read utilization",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "1m",
|
||||
"rows": [ ],
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"aerospike-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": { },
|
||||
"hide": 0,
|
||||
"label": "Data source",
|
||||
"name": "prometheus_datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(aerospike_namespace_ns_cluster_size,job)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "Cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(aerospike_namespace_dead_partitions{job=~\"$job\"}, cluster)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": "",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Aerospike cluster",
|
||||
"multi": true,
|
||||
"name": "aerospike_cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(aerospike_namespace_ns_cluster_size{job=~\"$job\"}, aerospike_cluster)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": "",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Namespace",
|
||||
"multi": true,
|
||||
"name": "ns",
|
||||
"options": [ ],
|
||||
"query": "label_values(aerospike_namespace_xmem_id{job=~\"$job\"}, ns)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-30m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "default",
|
||||
"title": "Aerospike namespace overview",
|
||||
"uid": "aerospike-namespace-overview",
|
||||
"version": 0
|
||||
}
|
1493
assets/aerospike/dashboards/aerospike-overview.json
Normal file
1493
assets/aerospike/dashboards/aerospike-overview.json
Normal file
File diff suppressed because it is too large
Load diff
1
assets/aerospike/rules.yaml
Normal file
1
assets/aerospike/rules.yaml
Normal file
|
@ -0,0 +1 @@
|
|||
null
|
51
assets/apache-activemq/alerts.yaml
Normal file
51
assets/apache-activemq/alerts.yaml
Normal file
|
@ -0,0 +1,51 @@
|
|||
groups:
|
||||
- name: apache-activemq-alerts
|
||||
rules:
|
||||
- alert: ApacheActiveMQHighTopicMemoryUsage
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} percent of memory used by topics on
|
||||
{{$labels.instance}} in cluster {{$labels.activemq_cluster}}, which is above
|
||||
the threshold of 70 percent.'
|
||||
summary: Topic destination memory usage is high, which may result in a reduction
|
||||
of the rate at which producers send messages.
|
||||
expr: |
|
||||
sum without (destination) (activemq_topic_memory_percent_usage{destination!~"ActiveMQ.Advisory.*"}) > 70
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: ApacheActiveMQHighQueueMemoryUsage
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} percent of memory used by queues on
|
||||
{{$labels.instance}} in cluster {{$labels.activemq_cluster}}, which is above
|
||||
the threshold of 70 percent.'
|
||||
summary: Queue destination memory usage is high, which may result in a reduction
|
||||
of the rate at which producers send messages.
|
||||
expr: |
|
||||
sum without (destination) (activemq_queue_memory_percent_usage) > 70
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: ApacheActiveMQHighStoreMemoryUsage
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} percent of store memory used on {{$labels.instance}}
|
||||
in cluster {{$labels.activemq_cluster}}, which is above the threshold of 70
|
||||
percent.'
|
||||
summary: Store memory usage is high, which may result in producers unable to
|
||||
send messages.
|
||||
expr: |
|
||||
activemq_store_usage_ratio > 70
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: ApacheActiveMQHighTemporaryMemoryUsage
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} percent of temporary memory used on
|
||||
{{$labels.instance}} in cluster {{$labels.activemq_cluster}}, which is above
|
||||
the threshold of 70 percent.'
|
||||
summary: Temporary memory usage is high, which may result in saturation of messaging
|
||||
throughput.
|
||||
expr: |
|
||||
activemq_temp_usage_ratio > 70
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
|
@ -0,0 +1,786 @@
|
|||
{
|
||||
"__inputs": [ ],
|
||||
"__requires": [ ],
|
||||
"annotations": {
|
||||
"list": [ ]
|
||||
},
|
||||
"description": "",
|
||||
"editable": false,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"id": null,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": false,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"apache-activemq-mixin"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Other Apache ActiveMQ dashboards",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Number of clusters that are reporting metrics from ActiveMQ.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "fixed"
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"colorMode": "none",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0-60139",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "count (activemq_memory_usage_ratio{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "__auto"
|
||||
}
|
||||
],
|
||||
"title": "Clusters",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Number of broker instances across clusters.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "fixed"
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"colorMode": "none",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0-60139",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "count (activemq_memory_usage_ratio{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "__auto"
|
||||
}
|
||||
],
|
||||
"title": "Brokers",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Number of message producers active on destinations across clusters.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "fixed"
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"colorMode": "none",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0-60139",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum (activemq_queue_producer_count{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\"}) + sum (activemq_topic_producer_count{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\",destination!~\"ActiveMQ.Advisory.*\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "__auto"
|
||||
}
|
||||
],
|
||||
"title": "Producers",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "The number of consumers subscribed to destinations across clusters.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "fixed"
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 0
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"colorMode": "none",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0-60139",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum (activemq_queue_consumer_count{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\"}) + sum (activemq_topic_consumer_count{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\",destination!~\"ActiveMQ.Advisory.*\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "__auto"
|
||||
}
|
||||
],
|
||||
"title": "Consumers",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Number of messages that have been sent to destinations in a cluster",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"fixedColor": "#C8F2C2",
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"axisShow": false,
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 25,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "smooth",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "normal"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 6
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum by (activemq_cluster, job) (increase(activemq_queue_enqueue_count{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\"}[$__interval:])) + sum by (activemq_cluster, job) (increase(activemq_topic_enqueue_count{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\", destination!~\"ActiveMQ.Advisory.*\"}[$__interval:]))",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{activemq_cluster}}"
|
||||
}
|
||||
],
|
||||
"title": "Enqueue / $__interval",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Number of messages that have been acknowledged (and removed) from destinations in a cluster.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"axisShow": false,
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 25,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "smooth",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "normal"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 6
|
||||
},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum by (activemq_cluster, job) (increase(activemq_queue_dequeue_count{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\"}[$__interval:])) + sum by (activemq_cluster, job) (increase(activemq_topic_dequeue_count{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\", destination!~\"ActiveMQ.Advisory.*\"}[$__interval:]))",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{activemq_cluster}}"
|
||||
}
|
||||
],
|
||||
"title": "Dequeue / $__interval",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Average percentage of temporary memory used across clusters.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [ ],
|
||||
"max": 1,
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "#EAB839",
|
||||
"value": 50
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 70
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percentunit"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 14
|
||||
},
|
||||
"id": 8,
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"minVizHeight": 10,
|
||||
"minVizWidth": 0,
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"showUnfilled": false,
|
||||
"text": { },
|
||||
"valueMode": "color"
|
||||
},
|
||||
"pluginVersion": "10.2.0-60139",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "avg by (activemq_cluster, job) (activemq_temp_usage_ratio{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{activemq_cluster}}"
|
||||
}
|
||||
],
|
||||
"title": "Average temporary memory usage",
|
||||
"type": "bargauge"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Average percentage of store memory used across clusters.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [ ],
|
||||
"max": 1,
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "#EAB839",
|
||||
"value": 50
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 70
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percentunit"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 14
|
||||
},
|
||||
"id": 9,
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"minVizHeight": 10,
|
||||
"minVizWidth": 0,
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"showUnfilled": false,
|
||||
"valueMode": "color"
|
||||
},
|
||||
"pluginVersion": "10.2.0-60139",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "avg by (activemq_cluster, job) (activemq_store_usage_ratio{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{activemq_cluster}}"
|
||||
}
|
||||
],
|
||||
"title": "Average store memory usage",
|
||||
"type": "bargauge"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Average percentage of broker memory used across clusters.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [ ],
|
||||
"max": 1,
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "#EAB839",
|
||||
"value": 50
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 70
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percentunit"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 14
|
||||
},
|
||||
"id": 10,
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"minVizHeight": 10,
|
||||
"minVizWidth": 0,
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"showUnfilled": false,
|
||||
"valueMode": "color"
|
||||
},
|
||||
"pluginVersion": "10.2.0-60139",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "avg by (activemq_cluster, job) (activemq_memory_usage_ratio{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{activemq_cluster}}"
|
||||
}
|
||||
],
|
||||
"title": "Average broker memory usage",
|
||||
"type": "bargauge"
|
||||
}
|
||||
],
|
||||
"refresh": "1m",
|
||||
"rows": [ ],
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"apache-activemq-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": { },
|
||||
"hide": 0,
|
||||
"label": "Data source",
|
||||
"name": "prometheus_datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(activemq_topic_producer_count,job)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "Cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(activemq_memory_usage_ratio{job=~\"$job\", cluster=~\"$cluster\"},cluster)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "ActiveMQ cluster",
|
||||
"multi": true,
|
||||
"name": "activemq_cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(activemq_memory_usage_ratio{job=~\"$job\"},activemq_cluster)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-30m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "default",
|
||||
"title": "Apache ActiveMQ cluster overview",
|
||||
"uid": "apache-activemq-cluster-overview",
|
||||
"version": 0
|
||||
}
|
File diff suppressed because it is too large
Load diff
310
assets/apache-activemq/dashboards/apache-activemq-logs.json
Normal file
310
assets/apache-activemq/dashboards/apache-activemq-logs.json
Normal file
|
@ -0,0 +1,310 @@
|
|||
{
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": false,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"apache-activemq-mixin"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Other Apache ActiveMQ dashboards",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"description": "Logs volume grouped by \"level\" label.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"drawStyle": "bars",
|
||||
"fillOpacity": 50,
|
||||
"stacking": {
|
||||
"mode": "normal"
|
||||
}
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(E|e)merg|(F|f)atal|(A|a)lert|(C|c)rit.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "purple",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(E|e)(rr.*|RR.*)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "red",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(W|w)(arn.*|ARN.*|rn|RN)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "orange",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(N|n)(otice|ote)|(I|i)(nf.*|NF.*)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "green",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "dbg.*|DBG.*|(D|d)(EBUG|ebug)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "blue",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(T|t)(race|RACE)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "light-blue",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "logs"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "text",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 24
|
||||
},
|
||||
"id": 1,
|
||||
"interval": "30s",
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"expr": "sum by (level) (count_over_time({job=~\"integrations/apache-activemq\",job=~\"$job\",activemq_cluster=~\"$activemq_cluster\",instance=~\"$instance\",level=~\"$level\"}\n|~ \"$regex_search\"\n\n[$__interval]))\n",
|
||||
"legendFormat": "{{ level }}"
|
||||
}
|
||||
],
|
||||
"title": "Logs volume",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "renameByRegex",
|
||||
"options": {
|
||||
"regex": "Value",
|
||||
"renamePattern": "logs"
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 18,
|
||||
"w": 24
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"dedupStrategy": "exact",
|
||||
"enableLogDetails": true,
|
||||
"prettifyLogMessage": true,
|
||||
"showTime": false,
|
||||
"wrapLogMessage": true
|
||||
},
|
||||
"pluginVersion": "v10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"expr": "{job=~\"integrations/apache-activemq\",job=~\"$job\",activemq_cluster=~\"$activemq_cluster\",instance=~\"$instance\",level=~\"$level\"} \n|~ \"$regex_search\"\n\n\n"
|
||||
}
|
||||
],
|
||||
"title": "Logs",
|
||||
"type": "logs"
|
||||
}
|
||||
],
|
||||
"refresh": "1m",
|
||||
"schemaVersion": 36,
|
||||
"tags": [
|
||||
"apache-activemq-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"label": "Loki data source",
|
||||
"name": "loki_datasource",
|
||||
"query": "loki",
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "Job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"query": "label_values({job=~\"integrations/apache-activemq\"}, job)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "Activemq_cluster",
|
||||
"multi": true,
|
||||
"name": "activemq_cluster",
|
||||
"query": "label_values({job=~\"integrations/apache-activemq\",job=~\"$job\"}, activemq_cluster)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "Instance",
|
||||
"multi": true,
|
||||
"name": "instance",
|
||||
"query": "label_values({job=~\"integrations/apache-activemq\",job=~\"$job\",activemq_cluster=~\"$activemq_cluster\"}, instance)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "Level",
|
||||
"multi": true,
|
||||
"name": "level",
|
||||
"query": "label_values({job=~\"integrations/apache-activemq\",job=~\"$job\",activemq_cluster=~\"$activemq_cluster\",instance=~\"$instance\"}, level)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"label": "Regex search",
|
||||
"name": "regex_search",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "",
|
||||
"value": ""
|
||||
}
|
||||
],
|
||||
"query": "",
|
||||
"type": "textbox"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "Apache ActiveMQ logs",
|
||||
"uid": "apache-activemq-logs"
|
||||
}
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
1
assets/apache-activemq/rules.yaml
Normal file
1
assets/apache-activemq/rules.yaml
Normal file
|
@ -0,0 +1 @@
|
|||
null
|
45
assets/apache-airflow/alerts.yaml
Normal file
45
assets/apache-airflow/alerts.yaml
Normal file
|
@ -0,0 +1,45 @@
|
|||
groups:
|
||||
- name: apache-airflow
|
||||
rules:
|
||||
- alert: ApacheAirflowStarvingPoolTasks
|
||||
annotations:
|
||||
description: |
|
||||
The number of starved tasks is {{ printf "%.0f" $value }} over the last 5m on {{ $labels.instance }} - {{ $labels.pool_name }} which is above the threshold of 0.
|
||||
summary: There are starved tasks detected in the Apache Airflow pool.
|
||||
expr: |
|
||||
airflow_pool_starving_tasks > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: ApacheAirflowDAGScheduleDelayWarningLevel
|
||||
annotations:
|
||||
description: |
|
||||
The average delay in DAG schedule to run time is {{ printf "%.0f" $value }} over the last 1m on {{ $labels.instance }} - {{ $labels.dag_id }} which is above the threshold of 10.
|
||||
summary: The delay in DAG schedule time to DAG run time has reached the warning
|
||||
threshold.
|
||||
expr: |
|
||||
increase(airflow_dagrun_schedule_delay_sum[5m]) / clamp_min(increase(airflow_dagrun_schedule_delay_count[5m]),1) > 10
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: ApacheAirflowDAGScheduleDelayCriticalLevel
|
||||
annotations:
|
||||
description: |
|
||||
The average delay in DAG schedule to run time is {{ printf "%.0f" $value }} over the last 1m for {{ $labels.instance }} - {{ $labels.dag_id }} which is above the threshold of 60.
|
||||
summary: The delay in DAG schedule time to DAG run time has reached the critical
|
||||
threshold.
|
||||
expr: |
|
||||
increase(airflow_dagrun_schedule_delay_sum[5m]) / clamp_min(increase(airflow_dagrun_schedule_delay_count[5m]),1) > 60
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: ApacheAirflowDAGFailures
|
||||
annotations:
|
||||
description: |
|
||||
The number of DAG failures seen is {{ printf "%.0f" $value }} over the last 1m for {{ $labels.instance }} - {{ $labels.dag_id }} which is above the threshold of 0.
|
||||
summary: There have been DAG failures detected.
|
||||
expr: |
|
||||
increase(airflow_dagrun_duration_failed_count[5m]) > 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
1434
assets/apache-airflow/dashboards/apache-airflow-overview.json
Normal file
1434
assets/apache-airflow/dashboards/apache-airflow-overview.json
Normal file
File diff suppressed because it is too large
Load diff
1
assets/apache-airflow/rules.yaml
Normal file
1
assets/apache-airflow/rules.yaml
Normal file
|
@ -0,0 +1 @@
|
|||
null
|
1
assets/apache-camel/alerts.yaml
Normal file
1
assets/apache-camel/alerts.yaml
Normal file
|
@ -0,0 +1 @@
|
|||
null
|
2455
assets/apache-camel/dashboards/apache-camel-micrometer.json
Normal file
2455
assets/apache-camel/dashboards/apache-camel-micrometer.json
Normal file
File diff suppressed because it is too large
Load diff
1
assets/apache-camel/rules.yaml
Normal file
1
assets/apache-camel/rules.yaml
Normal file
|
@ -0,0 +1 @@
|
|||
null
|
91
assets/apache-cassandra/alerts.yaml
Normal file
91
assets/apache-cassandra/alerts.yaml
Normal file
|
@ -0,0 +1,91 @@
|
|||
groups:
|
||||
- name: ApacheCassandraAlerts
|
||||
rules:
|
||||
- alert: HighReadLatency
|
||||
annotations:
|
||||
description: 'An average of {{ printf "%.0f" $value }}ms of read latency has
|
||||
occurred over the last 5 minutes on {{$labels.instance}}, which is above the
|
||||
threshold of 200ms. '
|
||||
summary: There is a high level of read latency within the node.
|
||||
expr: |
|
||||
sum(cassandra_table_readlatency_seconds_sum) by (instance) / sum(cassandra_table_readlatency_seconds_count) by (instance) * 1000 > 200
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: HighWriteLatency
|
||||
annotations:
|
||||
description: 'An average of {{ printf "%.0f" $value }}ms of write latency has
|
||||
occurred over the last 5 minutes on {{$labels.instance}}, which is above the
|
||||
threshold of 200ms. '
|
||||
summary: There is a high level of write latency within the node.
|
||||
expr: |
|
||||
sum(cassandra_keyspace_writelatency_seconds_sum) by (instance) / sum(cassandra_keyspace_writelatency_seconds_count) by (instance) * 1000 > 200
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: HighPendingCompactionTasks
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} compaction tasks have been pending
|
||||
over the last 15 minutes on {{$labels.instance}}, which is above the threshold
|
||||
of 30. '
|
||||
summary: Compaction task queue is filling up.
|
||||
expr: |
|
||||
cassandra_compaction_pendingtasks > 30
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: BlockedCompactionTasksFound
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} compaction tasks have been blocked
|
||||
over the last 5 minutes on {{$labels.instance}}, which is above the threshold
|
||||
of 1. '
|
||||
summary: Compaction task queue is full.
|
||||
expr: |
|
||||
cassandra_threadpools_currentlyblockedtasks_count{threadpools="CompactionExecutor", path="internal"} > 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: HintsStoredOnNode
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} hints have been written to the node
|
||||
over the last minute on {{$labels.instance}}, which is above the threshold
|
||||
of 1. '
|
||||
summary: Hints have been recently written to this node.
|
||||
expr: |
|
||||
increase(cassandra_storage_totalhints_count[5m]) > 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: UnavailableWriteRequestsFound
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} unavailable write requests have been
|
||||
found over the last 5 minutes on {{$labels.instance}}, which is above the
|
||||
threshold of 1. '
|
||||
summary: Unavailable exceptions have been encountered while performing writes
|
||||
in this cluster.
|
||||
expr: |
|
||||
sum(cassandra_clientrequest_unavailables_count{clientrequest="Write"}) by (cassandra_cluster) > 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: HighCpuUsage
|
||||
annotations:
|
||||
description: 'Cpu usage is at {{ printf "%.0f" $value }} percent over the last
|
||||
5 minutes on {{$labels.instance}}, which is above the threshold of 80. '
|
||||
summary: A node has a CPU usage higher than the configured threshold.
|
||||
expr: |
|
||||
jvm_process_cpu_load{job=~"integrations/apache-cassandra"} * 100 > 80
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: HighMemoryUsage
|
||||
annotations:
|
||||
description: 'Memory usage is at {{ printf "%.0f" $value }} percent over the
|
||||
last 5 minutes on {{$labels.instance}}, which is above the threshold of 80
|
||||
}}. '
|
||||
summary: A node has a higher memory utilization than the configured threshold.
|
||||
expr: |
|
||||
sum(jvm_memory_usage_used_bytes{job=~"integrations/apache-cassandra", area="Heap"}) / sum(jvm_physical_memory_size{job=~"integrations/apache-cassandra"}) * 100 > 80
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
1090
assets/apache-cassandra/dashboards/cassandra-keyspaces.json
Normal file
1090
assets/apache-cassandra/dashboards/cassandra-keyspaces.json
Normal file
File diff suppressed because it is too large
Load diff
1700
assets/apache-cassandra/dashboards/cassandra-nodes.json
Normal file
1700
assets/apache-cassandra/dashboards/cassandra-nodes.json
Normal file
File diff suppressed because it is too large
Load diff
2115
assets/apache-cassandra/dashboards/cassandra-overview.json
Normal file
2115
assets/apache-cassandra/dashboards/cassandra-overview.json
Normal file
File diff suppressed because it is too large
Load diff
1
assets/apache-cassandra/rules.yaml
Normal file
1
assets/apache-cassandra/rules.yaml
Normal file
|
@ -0,0 +1 @@
|
|||
null
|
114
assets/apache-couchdb/alerts.yaml
Normal file
114
assets/apache-couchdb/alerts.yaml
Normal file
|
@ -0,0 +1,114 @@
|
|||
groups:
|
||||
- name: ApacheCouchDBAlerts
|
||||
rules:
|
||||
- alert: CouchDBUnhealthyCluster
|
||||
annotations:
|
||||
description: '{{$labels.couchdb_cluster}} has reported a value of {{ printf
|
||||
"%.0f" $value }} for its stability over the last 5 minutes, which is below
|
||||
the threshold of 1.'
|
||||
summary: At least one of the nodes in a cluster is reporting the cluster as
|
||||
being unstable.
|
||||
expr: |
|
||||
min by(job, couchdb_cluster) (couchdb_couch_replicator_cluster_is_stable) < 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: CouchDBHigh4xxResponseCodes
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} 4xx responses have been detected over
|
||||
the last 5 minutes on {{$labels.instance}}, which is above the threshold of
|
||||
5.'
|
||||
summary: There are a high number of 4xx responses for incoming requests to a
|
||||
node.
|
||||
expr: |
|
||||
sum by(job, instance) (increase(couchdb_httpd_status_codes{code=~"4.*"}[5m])) > 5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: CouchDBHigh5xxResponseCodes
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} 5xx responses have been detected over
|
||||
the last 5 minutes on {{$labels.instance}}, which is above the threshold of
|
||||
0.'
|
||||
summary: There are a high number of 5xx responses for incoming requests to a
|
||||
node.
|
||||
expr: |
|
||||
sum by(job, instance) (increase(couchdb_httpd_status_codes{code=~"5.*"}[5m])) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: CouchDBModerateRequestLatency
|
||||
annotations:
|
||||
description: 'An average of {{ printf "%.0f" $value }}ms of request latency
|
||||
has occurred over the last 5 minutes on {{$labels.instance}}, which is above
|
||||
the threshold of 500ms. '
|
||||
summary: There is a moderate level of request latency for a node.
|
||||
expr: |
|
||||
sum by(job, instance) (couchdb_request_time_seconds_sum / couchdb_request_time_seconds_count) > 500
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: CouchDBHighRequestLatency
|
||||
annotations:
|
||||
description: 'An average of {{ printf "%.0f" $value }}ms of request latency
|
||||
has occurred over the last 5 minutes on {{$labels.instance}}, which is above
|
||||
the threshold of 1000ms. '
|
||||
summary: There is a high level of request latency for a node.
|
||||
expr: |
|
||||
sum by(job, instance) (couchdb_request_time_seconds_sum / couchdb_request_time_seconds_count) > 1000
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: CouchDBManyReplicatorJobsPending
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} replicator jobs are pending on {{$labels.instance}},
|
||||
which is above the threshold of 10. '
|
||||
summary: There is a high number of replicator jobs pending for a node.
|
||||
expr: |
|
||||
sum by(job, instance) (couchdb_couch_replicator_jobs_pending) > 10
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: CouchDBReplicatorJobsCrashing
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} replicator jobs have crashed over the
|
||||
last 5 minutes on {{$labels.instance}}, which is above the threshold of 0. '
|
||||
summary: There are replicator jobs crashing for a node.
|
||||
expr: |
|
||||
sum by(job, instance) (increase(couchdb_couch_replicator_jobs_crashes_total[5m])) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: CouchDBReplicatorChangesQueuesDying
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} replicator changes queue processes
|
||||
have died over the last 5 minutes on {{$labels.instance}}, which is above
|
||||
the threshold of 0. '
|
||||
summary: There are replicator changes queue process deaths for a node.
|
||||
expr: |
|
||||
sum by(job, instance) (increase(couchdb_couch_replicator_changes_queue_deaths_total[5m])) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: CouchDBReplicatorConnectionOwnersCrashing
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} replicator connection owner processes
|
||||
have crashed over the last 5 minutes on {{$labels.instance}}, which is above
|
||||
the threshold of 0. '
|
||||
summary: There are replicator connection owner process crashes for a node.
|
||||
expr: |
|
||||
sum by(job, instance) (increase(couchdb_couch_replicator_connection_owner_crashes_total[5m])) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: CouchDBReplicatorConnectionWorkersCrashing
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} replicator connection worker processes
|
||||
have crashed over the last 5 minutes on {{$labels.instance}}, which is above
|
||||
the threshold of 0. '
|
||||
summary: There are replicator connection worker process crashes for a node.
|
||||
expr: |
|
||||
sum by(job, instance) (increase(couchdb_couch_replicator_connection_worker_crashes_total[5m])) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
1663
assets/apache-couchdb/dashboards/couchdb-nodes.json
Normal file
1663
assets/apache-couchdb/dashboards/couchdb-nodes.json
Normal file
File diff suppressed because it is too large
Load diff
2244
assets/apache-couchdb/dashboards/couchdb-overview.json
Normal file
2244
assets/apache-couchdb/dashboards/couchdb-overview.json
Normal file
File diff suppressed because it is too large
Load diff
1
assets/apache-couchdb/rules.yaml
Normal file
1
assets/apache-couchdb/rules.yaml
Normal file
|
@ -0,0 +1 @@
|
|||
null
|
90
assets/apache-hadoop/alerts.yaml
Normal file
90
assets/apache-hadoop/alerts.yaml
Normal file
|
@ -0,0 +1,90 @@
|
|||
groups:
|
||||
- name: apache-hadoop
|
||||
rules:
|
||||
- alert: ApacheHadoopLowHDFSCapacity
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} percent remaining HDFS usage on {{$labels.hadoop_cluster}}
|
||||
- {{$labels.instance}}, which is below the threshold of 20.'
|
||||
summary: Remaining HDFS cluster capacity is low which may result in DataNode
|
||||
failures or prevent DataNodes from writing data.
|
||||
expr: |
|
||||
min without(job, name) (100 * hadoop_namenode_capacityremaining / clamp_min(hadoop_namenode_capacitytotal, 1)) < 20
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: ApacheHadoopHDFSMissingBlocks
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} HDFS missing blocks on {{$labels.hadoop_cluster}}
|
||||
- {{$labels.instance}}, which is above the threshold of 0.'
|
||||
summary: There are missing blocks in the HDFS cluster which may indicate potential
|
||||
data loss.
|
||||
expr: |
|
||||
max without(job, name) (hadoop_namenode_missingblocks) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: ApacheHadoopHDFSHighVolumeFailures
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} HDFS volume failures on {{$labels.hadoop_cluster}}
|
||||
- {{$labels.instance}}, which is above the threshold of 0.'
|
||||
summary: A volume failure in HDFS cluster may indicate hardware failures.
|
||||
expr: |
|
||||
max without(job, name) (hadoop_namenode_volumefailurestotal) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: ApacheHadoopHighDeadDataNodes
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} dead HDFS volume failures on {{$labels.hadoop_cluster}}
|
||||
- {{$labels.instance}}, which is above the threshold of 0.'
|
||||
summary: Number of dead DataNodes has increased, which could result in data
|
||||
loss and increased network activity.
|
||||
expr: |
|
||||
max without(job, name) (hadoop_namenode_numdeaddatanodes) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: ApacheHadoopHighNodeManagerCPUUsage
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} CPU usage on {{$labels.hadoop_cluster}}
|
||||
- {{$labels.instance}}, which is above the threshold of 80.'
|
||||
summary: A NodeManager has a CPU usage higher than the configured threshold.
|
||||
expr: |
|
||||
max without(job, name) (100 * hadoop_nodemanager_nodecpuutilization) > 80
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: ApacheHadoopHighNodeManagerMemoryUsage
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value}} percent NodeManager memory usage on
|
||||
{{$labels.hadoop_cluster}} - {{$labels.instance}}, which is above the threshold
|
||||
of 80.'
|
||||
summary: A NodeManager has a higher memory utilization than the configured threshold.
|
||||
expr: |
|
||||
max without(job, name) (100 * hadoop_nodemanager_allocatedgb / clamp_min(hadoop_nodemanager_availablegb + hadoop_nodemanager_allocatedgb,1)) > 80
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: ApacheHadoopHighResourceManagerVirtualCoreCPUUsage
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} virtual core CPU usage on {{$labels.hadoop_cluster}}
|
||||
- {{$labels.instance}}, which is above the threshold of 80.'
|
||||
summary: A ResourceManager has a virtual core CPU usage higher than the configured
|
||||
threshold.
|
||||
expr: |
|
||||
max without(job, name) (100 * hadoop_resourcemanager_allocatedvcores / clamp_min(hadoop_resourcemanager_availablevcores + hadoop_resourcemanager_allocatedvcores,1)) > 80
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: ApacheHadoopHighResourceManagerMemoryUsage
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value}} percent ResourceManager memory usage
|
||||
on {{$labels.hadoop_cluster}} - {{$labels.instance}}, which is above the threshold
|
||||
of 80.'
|
||||
summary: A ResourceManager has a higher memory utilization than the configured
|
||||
threshold.
|
||||
expr: |
|
||||
max without(job, name) (100 * hadoop_resourcemanager_allocatedmb / clamp_min(hadoop_resourcemanager_availablemb + hadoop_resourcemanager_allocatedmb,1)) > 80
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
|
@ -0,0 +1,488 @@
|
|||
{
|
||||
"__inputs": [ ],
|
||||
"__requires": [ ],
|
||||
"annotations": {
|
||||
"list": [ ]
|
||||
},
|
||||
"description": "",
|
||||
"editable": false,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"id": null,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": false,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"apache-hadoop-mixin"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Other Apache Hadoop dashboards",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"panels": [
|
||||
{
|
||||
"collapsed": false,
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": ""
|
||||
}
|
||||
],
|
||||
"title": "DataNodes",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Total number of blocks evicted without being read by the Hadoop DataNode.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": ""
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 1
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "increase(hadoop_datanode_ramdiskblocksevictedwithoutread{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\"}[$__interval:])",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{hadoop_cluster}} - {{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Unread blocks evicted",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Total number of blocks removed by the Hadoop DataNode.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": ""
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 1
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "increase(hadoop_datanode_blocksremoved{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\"}[$__interval:])",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{hadoop_cluster}} - {{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Blocks removed",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Displays the total number of volume failures encountered by the Hadoop DataNode.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": ""
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 1
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "increase(hadoop_datanode_volumefailures{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\"}[$__interval:])",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{hadoop_cluster}} - {{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Volume failures",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"description": "The DataNode logs.",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 7
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"dedupStrategy": "none",
|
||||
"enableLogDetails": true,
|
||||
"prettifyLogMessage": false,
|
||||
"showCommonLabels": false,
|
||||
"showLabels": false,
|
||||
"showTime": false,
|
||||
"sortOrder": "Descending",
|
||||
"wrapLogMessage": false
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "{job=~\"$job\", hadoop_cluster=~\"$hadoop_cluster\", instance=~\"$instance\", filename=~\".*/hadoop/logs/.*-datanode.*.log\"} |= ``",
|
||||
"queryType": "range",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "DataNode logs",
|
||||
"type": "logs"
|
||||
}
|
||||
],
|
||||
"refresh": "1m",
|
||||
"rows": [ ],
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"apache-hadoop-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": { },
|
||||
"hide": 0,
|
||||
"label": "Data Source",
|
||||
"name": "prometheus_datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"current": { },
|
||||
"hide": 0,
|
||||
"label": "Loki Datasource",
|
||||
"name": "loki_datasource",
|
||||
"options": [ ],
|
||||
"query": "loki",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(hadoop_datanode_ramdiskblocksevictedwithoutread,job)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Instance",
|
||||
"multi": true,
|
||||
"name": "instance",
|
||||
"options": [ ],
|
||||
"query": "label_values(hadoop_datanode_ramdiskblocksevictedwithoutread{job=~\"$job\"}, instance)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Hadoop cluster",
|
||||
"multi": true,
|
||||
"name": "hadoop_cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(hadoop_datanode_ramdiskblocksevictedwithoutread{job=~\"$job\"}, hadoop_cluster)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "default",
|
||||
"title": "Apache Hadoop DataNode overview",
|
||||
"uid": "apache-hadoop-datanode-overview",
|
||||
"version": 0
|
||||
}
|
|
@ -0,0 +1,983 @@
|
|||
{
|
||||
"__inputs": [ ],
|
||||
"__requires": [ ],
|
||||
"annotations": {
|
||||
"list": [ ]
|
||||
},
|
||||
"description": "",
|
||||
"editable": false,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"id": null,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": false,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"apache-hadoop-mixin"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Other Apache Hadoop dashboards",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "The DataNodes current state.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
}
|
||||
},
|
||||
"mappings": [ ]
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"pieType": "pie",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "hadoop_namenode_numlivedatanodes{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\", name=\"FSNamesystem\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{hadoop_cluster}} - {{instance}} - live DataNodes"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "hadoop_namenode_numdeaddatanodes{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\", name=\"FSNamesystem\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{hadoop_cluster}} - {{instance}} - dead DataNodes"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "hadoop_namenode_numstaledatanodes{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{hadoop_cluster}} - {{instance}} - stale DataNodes"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "hadoop_namenode_numdecommissioningdatanodes{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\", name=\"FSNamesystem\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{hadoop_cluster}} - {{instance}} - decommissioning DataNodes"
|
||||
}
|
||||
],
|
||||
"title": "DataNode state",
|
||||
"type": "piechart"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "The storage utilization of the NameNode.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "100 * hadoop_namenode_capacityused{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\", name=\"FSNamesystem\"} / clamp_min(hadoop_namenode_capacitytotal{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\", name=\"FSNamesystem\"}, 1)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{hadoop_cluster}} - {{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Capacity utilization",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Total number of blocks managed by the NameNode.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": ""
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 9
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "hadoop_namenode_blockstotal{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\", name=\"FSNamesystem\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{hadoop_cluster}} - {{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Total blocks",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Number of blocks reported by DataNodes as missing.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": ""
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 9
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "hadoop_namenode_missingblocks{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\", name=\"FSNamesystem\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{hadoop_cluster}} - {{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Missing blocks",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Number of blocks that are under-replicated.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": ""
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 9
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "hadoop_namenode_underreplicatedblocks{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\", name=\"FSNamesystem\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{hadoop_cluster}} - {{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Under-replicated blocks",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Number of transactions processed by the NameNode since the last checkpoint.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": ""
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 15
|
||||
},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "hadoop_namenode_transactionssincelastcheckpoint{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\", name=\"FSNamesystem\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{hadoop_cluster}} - {{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Transactions since last checkpoint",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "The recent increase in number of volume failures on all DataNodes.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": ""
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 15
|
||||
},
|
||||
"id": 8,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "increase(hadoop_namenode_volumefailurestotal{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\", name=\"FSNamesystem\"}[$__interval:])",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{hadoop_cluster}} - {{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Volume failures",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Total number of files managed by the NameNode.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": ""
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 21
|
||||
},
|
||||
"id": 9,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "hadoop_namenode_filestotal{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\", name=\"FSNamesystem\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{hadoop_cluster}} - {{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Total files",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Total load on the NameNode.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": ""
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 21
|
||||
},
|
||||
"id": 10,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "hadoop_namenode_totalload{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\", name=\"FSNamesystem\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{hadoop_cluster}} - {{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Total load",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"description": "The NameNode logs.",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 27
|
||||
},
|
||||
"id": 11,
|
||||
"options": {
|
||||
"dedupStrategy": "none",
|
||||
"enableLogDetails": true,
|
||||
"prettifyLogMessage": false,
|
||||
"showCommonLabels": false,
|
||||
"showLabels": false,
|
||||
"showTime": false,
|
||||
"sortOrder": "Descending",
|
||||
"wrapLogMessage": false
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "{job=~\"$job\", hadoop_cluster=~\"$hadoop_cluster\", instance=~\"$instance\", filename=~\".*/hadoop/logs/.*-namenode.*.log\"} |= ``",
|
||||
"queryType": "range",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "NameNode logs",
|
||||
"type": "logs"
|
||||
}
|
||||
],
|
||||
"refresh": "1m",
|
||||
"rows": [ ],
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"apache-hadoop-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": { },
|
||||
"hide": 0,
|
||||
"label": "Data Source",
|
||||
"name": "prometheus_datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"current": { },
|
||||
"hide": 0,
|
||||
"label": "Loki Datasource",
|
||||
"name": "loki_datasource",
|
||||
"options": [ ],
|
||||
"query": "loki",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(hadoop_namenode_blockstotal,job)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Instance",
|
||||
"multi": true,
|
||||
"name": "instance",
|
||||
"options": [ ],
|
||||
"query": "label_values(hadoop_namenode_blockstotal{job=~\"$job\"}, instance)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Hadoop cluster",
|
||||
"multi": true,
|
||||
"name": "hadoop_cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(hadoop_namenode_blockstotal{job=~\"$job\"}, hadoop_cluster)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "default",
|
||||
"title": "Apache Hadoop NameNode overview",
|
||||
"uid": "apache-hadoop-namenode-overview",
|
||||
"version": 0
|
||||
}
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
1
assets/apache-hadoop/rules.yaml
Normal file
1
assets/apache-hadoop/rules.yaml
Normal file
|
@ -0,0 +1 @@
|
|||
null
|
59
assets/apache-hbase/alerts.yaml
Normal file
59
assets/apache-hbase/alerts.yaml
Normal file
|
@ -0,0 +1,59 @@
|
|||
groups:
|
||||
- name: apache-hbase-alerts
|
||||
rules:
|
||||
- alert: HBaseHighHeapMemUsage
|
||||
annotations:
|
||||
description: The heap memory usage for the JVM on instance {{$labels.instance}}
|
||||
in cluster {{$labels.hbase_cluster}} is {{printf "%.0f" $value}} percent,
|
||||
which is above the threshold of 80 percent
|
||||
summary: There is a limited amount of heap memory available to the JVM.
|
||||
expr: |
|
||||
100 * sum without(context, hostname, processname) (jvm_metrics_mem_heap_used_m{job=~"integrations/apache-hbase"} / clamp_min(jvm_metrics_mem_heap_committed_m{job=~"integrations/apache-hbase"}, 1)) > 80
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: HBaseDeadRegionServer
|
||||
annotations:
|
||||
description: '{{$value}} RegionServer(s) in cluster {{$labels.hbase_cluster}}
|
||||
are unresponsive, which is above the threshold of 0. The name(s) of the dead
|
||||
RegionServer(s) are {{$labels.deadregionservers}}'
|
||||
summary: One or more RegionServer(s) has become unresponsive.
|
||||
expr: |
|
||||
server_num_dead_region_servers > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: HBaseOldRegionsInTransition
|
||||
annotations:
|
||||
description: '{{printf "%.0f" $value}} percent of RegionServers in transition
|
||||
in cluster {{$labels.hbase_cluster}} are transitioning for longer than expected,
|
||||
which is above the threshold of 50 percent'
|
||||
summary: RegionServers are in transition for longer than expected.
|
||||
expr: |
|
||||
100 * assignment_manager_rit_count_over_threshold / clamp_min(assignment_manager_rit_count, 1) > 50
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: HBaseHighMasterAuthFailRate
|
||||
annotations:
|
||||
description: '{{printf "%.0f" $value}} percent of authentication attempts to
|
||||
the master are failing in cluster {{$labels.hbase_cluster}}, which is above
|
||||
the threshold of 35 percent'
|
||||
summary: A high percentage of authentication attempts to the master are failing.
|
||||
expr: |
|
||||
100 * rate(master_authentication_failures[5m]) / (clamp_min(rate(master_authentication_successes[5m]), 1) + clamp_min(rate(master_authentication_failures[5m]), 1)) > 35
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: HBaseHighRSAuthFailRate
|
||||
annotations:
|
||||
description: '{{printf "%.0f" $value}} percent of authentication attempts to
|
||||
the RegionServer {{$labels.instance}} are failing in cluster {{$labels.hbase_cluster}},
|
||||
which is above the threshold of 35 percent'
|
||||
summary: A high percentage of authentication attempts to a RegionServer are
|
||||
failing.
|
||||
expr: |
|
||||
100 * rate(region_server_authentication_failures[5m]) / (clamp_min(rate(region_server_authentication_successes[5m]), 1) + clamp_min(rate(region_server_authentication_failures[5m]), 1)) > 35
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
1336
assets/apache-hbase/dashboards/apache-hbase-cluster-overview.json
Normal file
1336
assets/apache-hbase/dashboards/apache-hbase-cluster-overview.json
Normal file
File diff suppressed because it is too large
Load diff
325
assets/apache-hbase/dashboards/apache-hbase-logs.json
Normal file
325
assets/apache-hbase/dashboards/apache-hbase-logs.json
Normal file
|
@ -0,0 +1,325 @@
|
|||
{
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": false,
|
||||
"icon": "external link",
|
||||
"includeVars": false,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"apache-hbase-mixin"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Other Apache HBase Dashboards",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"description": "Logs volume grouped by \"level\" label.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"drawStyle": "bars",
|
||||
"fillOpacity": 50,
|
||||
"stacking": {
|
||||
"mode": "normal"
|
||||
}
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(E|e)merg|(F|f)atal|(A|a)lert|(C|c)rit.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "purple",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(E|e)(rr.*|RR.*)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "red",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(W|w)(arn.*|ARN.*|rn|RN)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "orange",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(N|n)(otice|ote)|(I|i)(nf.*|NF.*)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "green",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "dbg.*|DBG.*|(D|d)(EBUG|ebug)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "blue",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(T|t)(race|RACE)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "light-blue",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "logs"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "text",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 24
|
||||
},
|
||||
"id": 1,
|
||||
"interval": "30s",
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"expr": "sum by (level) (count_over_time({job=~\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\",logger=~\"$logger\",level=~\"$level\"}\n|~ \"$regex_search\"\n\n[$__interval]))\n",
|
||||
"legendFormat": "{{ level }}"
|
||||
}
|
||||
],
|
||||
"title": "Logs volume",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "renameByRegex",
|
||||
"options": {
|
||||
"regex": "Value",
|
||||
"renamePattern": "logs"
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 18,
|
||||
"w": 24
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"dedupStrategy": "exact",
|
||||
"enableLogDetails": true,
|
||||
"prettifyLogMessage": true,
|
||||
"showTime": false,
|
||||
"wrapLogMessage": true
|
||||
},
|
||||
"pluginVersion": "v10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"expr": "{job=~\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\",logger=~\"$logger\",level=~\"$level\"} \n|~ \"$regex_search\"\n\n\n"
|
||||
}
|
||||
],
|
||||
"title": "Logs",
|
||||
"type": "logs"
|
||||
}
|
||||
],
|
||||
"refresh": "1m",
|
||||
"schemaVersion": 36,
|
||||
"tags": [
|
||||
"apache-hbase-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"label": "Loki data source",
|
||||
"name": "loki_datasource",
|
||||
"query": "loki",
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "Job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"query": "label_values({job=~\"integrations/apache-hbase\"}, job)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "Hbase_cluster",
|
||||
"multi": true,
|
||||
"name": "hbase_cluster",
|
||||
"query": "label_values({job=~\"integrations/apache-hbase\",job=~\"$job\"}, hbase_cluster)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "Instance",
|
||||
"multi": true,
|
||||
"name": "instance",
|
||||
"query": "label_values({job=~\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\"}, instance)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "Logger",
|
||||
"multi": true,
|
||||
"name": "logger",
|
||||
"query": "label_values({job=~\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}, logger)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "Level",
|
||||
"multi": true,
|
||||
"name": "level",
|
||||
"query": "label_values({job=~\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\",logger=~\"$logger\"}, level)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"label": "Regex search",
|
||||
"name": "regex_search",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "",
|
||||
"value": ""
|
||||
}
|
||||
],
|
||||
"query": "",
|
||||
"type": "textbox"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "Apache HBase logs overview",
|
||||
"uid": "apache-hbase-logs-overview"
|
||||
}
|
File diff suppressed because it is too large
Load diff
1
assets/apache-hbase/rules.yaml
Normal file
1
assets/apache-hbase/rules.yaml
Normal file
|
@ -0,0 +1 @@
|
|||
null
|
41
assets/apache-http/alerts.yaml
Normal file
41
assets/apache-http/alerts.yaml
Normal file
|
@ -0,0 +1,41 @@
|
|||
groups:
|
||||
- name: apache-http
|
||||
rules:
|
||||
- alert: ApacheDown
|
||||
annotations:
|
||||
description: Apache is down on {{ $labels.instance }}.
|
||||
summary: Apache is down.
|
||||
expr: apache_up == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: ApacheRestart
|
||||
annotations:
|
||||
description: Apache has just been restarted on {{ $labels.instance }}.
|
||||
summary: Apache restart.
|
||||
expr: apache_uptime_seconds_total / 60 < 1
|
||||
for: "0"
|
||||
labels:
|
||||
severity: info
|
||||
- alert: ApacheWorkersLoad
|
||||
annotations:
|
||||
description: |
|
||||
Apache workers in busy state approach the max workers count 80% workers busy on {{ $labels.instance }}.
|
||||
The current value is {{ $value }}%.
|
||||
summary: Apache workers load is too high.
|
||||
expr: |
|
||||
(sum by (instance) (apache_workers{state="busy"}) / sum by (instance) (apache_scoreboard) ) * 100 > 80
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: ApacheResponseTimeTooHigh
|
||||
annotations:
|
||||
description: |
|
||||
Apache average response time is above the threshold of 5000 ms on {{ $labels.instance }}.
|
||||
The current value is {{ $value }} ms.
|
||||
summary: Apache response time is too high.
|
||||
expr: |
|
||||
increase(apache_duration_ms_total[5m])/increase(apache_accesses_total[5m]) > 5000
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
872
assets/apache-http/dashboards/apache-http.json
Normal file
872
assets/apache-http/dashboards/apache-http.json
Normal file
|
@ -0,0 +1,872 @@
|
|||
{
|
||||
"__inputs": [ ],
|
||||
"__requires": [ ],
|
||||
"annotations": {
|
||||
"list": [ ]
|
||||
},
|
||||
"editable": false,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 1,
|
||||
"hideControls": false,
|
||||
"id": null,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": false,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"apache-http-mixin"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Other Apache HTTP dashboards",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"decimals": 1,
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"match": "null",
|
||||
"result": {
|
||||
"text": "N/A"
|
||||
}
|
||||
},
|
||||
"type": "special"
|
||||
}
|
||||
],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 4,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"maxDataPoints": 100,
|
||||
"options": {
|
||||
"colorMode": "none",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "8.4.5",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "apache_uptime_seconds_total{job=~\"$job\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"step": 240
|
||||
}
|
||||
],
|
||||
"title": "Uptime",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"decimals": 1,
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"match": "null",
|
||||
"result": {
|
||||
"text": "N/A"
|
||||
}
|
||||
},
|
||||
"type": "special"
|
||||
}
|
||||
],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 4,
|
||||
"x": 4,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"maxDataPoints": 100,
|
||||
"options": {
|
||||
"colorMode": "none",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"text": {
|
||||
"titleSize": 2
|
||||
},
|
||||
"textMode": "name"
|
||||
},
|
||||
"pluginVersion": "8.4.5",
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": false,
|
||||
"expr": "apache_info{job=~\"$job\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{ version }}",
|
||||
"step": 240
|
||||
}
|
||||
],
|
||||
"title": "Version",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "continuous-GrYlRd"
|
||||
},
|
||||
"custom": {
|
||||
"fillOpacity": 70,
|
||||
"lineWidth": 0,
|
||||
"spanNulls": false
|
||||
},
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"0": {
|
||||
"color": "red",
|
||||
"index": 1,
|
||||
"text": "Down"
|
||||
},
|
||||
"1": {
|
||||
"color": "green",
|
||||
"index": 0,
|
||||
"text": "Up"
|
||||
}
|
||||
},
|
||||
"type": "value"
|
||||
}
|
||||
],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 16,
|
||||
"x": 8,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"alignValue": "left",
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "right"
|
||||
},
|
||||
"mergeValues": false,
|
||||
"rowHeight": 0.90000000000000002,
|
||||
"showValue": "never",
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "8.4.5",
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "apache_up{job=~\"$job\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Apache up",
|
||||
"refId": "A",
|
||||
"step": 240
|
||||
}
|
||||
],
|
||||
"title": "Apache Up / Down",
|
||||
"type": "state-timeline"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineStyle": {
|
||||
"fill": "solid"
|
||||
},
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": true,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "reqps"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Bytes sent"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.axisPlacement",
|
||||
"value": "right"
|
||||
},
|
||||
{
|
||||
"id": "custom.drawStyle",
|
||||
"value": "bars"
|
||||
},
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "Bps"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 1
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull",
|
||||
"max",
|
||||
"min"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "8.4.5",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "rate(apache_accesses_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Calls",
|
||||
"refId": "A",
|
||||
"step": 240
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "rate(apache_sent_kilobytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) * 1000",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"legendFormat": "Bytes sent",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Load",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": true,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "ms"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 1
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull",
|
||||
"max",
|
||||
"min"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "8.4.5",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"exemplar": false,
|
||||
"expr": "increase(apache_duration_ms_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])/increase(apache_accesses_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Average response time",
|
||||
"refId": "A",
|
||||
"step": 240
|
||||
}
|
||||
],
|
||||
"title": "Response time",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "stepAfter",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": true,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "normal"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 2
|
||||
},
|
||||
"id": 7,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull",
|
||||
"max",
|
||||
"min"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"sortBy": "Last *",
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "8.4.5",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "apache_scoreboard{job=~\"$job\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{ state }}",
|
||||
"refId": "A",
|
||||
"step": 240
|
||||
}
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Apache scoreboard statuses",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "stepAfter",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": true,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "normal"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3
|
||||
},
|
||||
"id": 8,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull",
|
||||
"max",
|
||||
"min"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "8.4.5",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "apache_workers{job=~\"$job\", instance=~\"$instance\"}\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{ state }}",
|
||||
"step": 240
|
||||
}
|
||||
],
|
||||
"title": "Apache worker statuses",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": true,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3
|
||||
},
|
||||
"id": 9,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull",
|
||||
"max",
|
||||
"min"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "8.4.5",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "apache_cpuload{job=~\"$job\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Load",
|
||||
"refId": "A",
|
||||
"step": 240
|
||||
}
|
||||
],
|
||||
"title": "Apache CPU load",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "1m",
|
||||
"rows": [ ],
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"apache-http-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"hide": 0,
|
||||
"label": "Data source",
|
||||
"name": "prometheus_datasource",
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": {
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"datasource": "$prometheus_datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(apache_up, job)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"datasource": "$prometheus_datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "instance",
|
||||
"multi": false,
|
||||
"name": "instance",
|
||||
"options": [ ],
|
||||
"query": "label_values(apache_up{job=~\"$job\"}, instance)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "default",
|
||||
"title": "Apache HTTP server",
|
||||
"uid": "apache-http",
|
||||
"version": 0
|
||||
}
|
1
assets/apache-http/rules.yaml
Normal file
1
assets/apache-http/rules.yaml
Normal file
|
@ -0,0 +1 @@
|
|||
null
|
52
assets/apache-mesos/alerts.yaml
Normal file
52
assets/apache-mesos/alerts.yaml
Normal file
|
@ -0,0 +1,52 @@
|
|||
groups:
|
||||
- name: apache-mesos
|
||||
rules:
|
||||
- alert: ApacheMesosHighMemoryUsage
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} percent memory usage on {{$labels.mesos_cluster}},
|
||||
which is above the threshold of 90.'
|
||||
summary: There is a high memory usage for the cluster.
|
||||
expr: |
|
||||
min without(instance, job, type) (mesos_master_mem{type="percent"}) > 90
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: ApacheMesosHighDiskUsage
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} percent disk usage on {{$labels.mesos_cluster}},
|
||||
which is above the threshold of 90.'
|
||||
summary: There is a high disk usage for the cluster.
|
||||
expr: |
|
||||
min without(instance, job, type) (mesos_master_disk{type="percent"}) > 90
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: ApacheMesosUnreachableTasks
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} unreachable tasks on {{$labels.mesos_cluster}},
|
||||
which is above the threshold of 3.'
|
||||
summary: There are an unusually high number of unreachable tasks.
|
||||
expr: |
|
||||
max without(instance, job, state) (mesos_master_task_states_current{state="unreachable"}) > 3
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: ApacheMesosNoLeaderElected
|
||||
annotations:
|
||||
description: There is no cluster coordinator on {{$labels.mesos_cluster}}.
|
||||
summary: There is currently no cluster coordinator.
|
||||
expr: |
|
||||
max without(instance, job) (mesos_master_elected) == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: ApacheMesosInactiveAgents
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} inactive agent clients over the last
|
||||
5m which is above the threshold of 1.'
|
||||
summary: There are currently inactive agent clients.
|
||||
expr: |
|
||||
max without(instance, job, state) (mesos_master_slaves_state{state=~"connected_inactive|disconnected_inactive"}) > 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
1653
assets/apache-mesos/dashboards/apache-mesos-overview.json
Normal file
1653
assets/apache-mesos/dashboards/apache-mesos-overview.json
Normal file
File diff suppressed because it is too large
Load diff
1
assets/apache-mesos/rules.yaml
Normal file
1
assets/apache-mesos/rules.yaml
Normal file
|
@ -0,0 +1 @@
|
|||
null
|
93
assets/apache-solr/alerts.yaml
Normal file
93
assets/apache-solr/alerts.yaml
Normal file
|
@ -0,0 +1,93 @@
|
|||
groups:
|
||||
- name: apache-solr
|
||||
rules:
|
||||
- alert: ApacheSolrZookeeperChangeInEnsembleSize
|
||||
annotations:
|
||||
description: Zookeeper host {{$labels.zk_host}} has had an ensemble change of
|
||||
{{ printf "%.0f" $value }} over the last 5 minutes
|
||||
summary: Changes in the ZooKeeper ensemble size can affect the stability and
|
||||
performance of the cluster.
|
||||
expr: |
|
||||
changes(solr_zookeeper_ensemble_size[5m]) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: ApacheSolrHighCPUUsageCritical
|
||||
annotations:
|
||||
description: '{{$labels.instance}} on cluster {{$labels.solr_cluster}} has had
|
||||
a system CPU load of {{ printf "%.0f" $value }}%, which is above the threshold
|
||||
of 85.'
|
||||
summary: High CPU load can indicate that Solr nodes are under heavy load, potentially
|
||||
impacting performance.
|
||||
expr: |
|
||||
100 * sum without (base_url, item) (avg_over_time(solr_metrics_jvm_os_cpu_load{item="systemCpuLoad"}[5m])) > 85
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: ApacheSolrHighCPUUsageWarning
|
||||
annotations:
|
||||
description: '{{$labels.instance}} on cluster {{$labels.solr_cluster}} has had
|
||||
a system CPU load of {{ printf "%.0f" $value }}%, which is above the threshold
|
||||
of 75.'
|
||||
summary: High CPU load can indicate that Solr nodes are under heavy load, potentially
|
||||
impacting performance.
|
||||
expr: |
|
||||
100 * sum without (base_url, item) (avg_over_time(solr_metrics_jvm_os_cpu_load{item="systemCpuLoad"}[5m])) > 75
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: ApacheSolrHighHeapMemoryUsageCritical
|
||||
annotations:
|
||||
description: |
|
||||
{{$labels.instance}} on cluster {{$labels.solr_cluster}} has had high memory usage of {{ printf "%.0f" $value }}%, which is above the thresold of 75.
|
||||
summary: High heap memory usage can lead to garbage collection issues, out-of-memory
|
||||
errors, and overall system instability.
|
||||
expr: |
|
||||
100 * sum without(item, base_url)(solr_metrics_jvm_memory_heap_bytes{item="used"}) / clamp_min(sum without(item, base_url)(solr_metrics_jvm_memory_heap_bytes{item="max"}), 1) > 75
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: ApacheSolrHighHeapMemoryUsageWarning
|
||||
annotations:
|
||||
description: |
|
||||
{{$labels.instance}} on cluster {{$labels.solr_cluster}} has had high memory usage of {{ printf "%.0f" $value }}%, which is above the thresold of 85.
|
||||
summary: High heap memory usage can lead to garbage collection issues, out-of-memory
|
||||
errors, and overall system instability.
|
||||
expr: |
|
||||
100 * sum without(item, base_url)(solr_metrics_jvm_memory_heap_bytes{item="used"}) / clamp_min(sum without(item, base_url)(solr_metrics_jvm_memory_heap_bytes{item="max"}), 1) > 85
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: ApacheSolrLowCacheHitRatio
|
||||
annotations:
|
||||
description: |
|
||||
{{$labels.instance}} on cluster {{$labels.solr_cluster}} has had a low cache hit ratio of {{ printf "%.0f" $value }}% on core {{$labels.core}} of type {{$labels.type}}, which is under the threshold of 75.
|
||||
summary: Low cache hit ratios can lead to increased disk I/O and slower query
|
||||
response times.
|
||||
expr: |
|
||||
100 * sum without(base_url, category, collection, item, replica, shard) (solr_metrics_core_searcher_cache_ratio{item="hitratio", type=~"documentCache|filterCache|queryResultCache"}) < 75
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: ApacheSolrHighCoreErrors
|
||||
annotations:
|
||||
description: |
|
||||
{{$labels.instance}} on cluster {{$labels.solr_cluster}} has had a high amount of core errors {{ printf "%.0f" $value }}% on core {{$labels.core}}, which is above the threshold of 15.
|
||||
summary: A spike in core errors can indicate serious issues at the core level,
|
||||
affecting data integrity and availability.
|
||||
expr: |
|
||||
100 * sum without(base_url, category, collection, handler, replica, shard) (increase(solr_metrics_core_errors_total[10m]) / clamp_min(avg_over_time(solr_metrics_core_errors_total[10m]), 1)) > 15
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: ApacheSolrHighDocumentIndexing
|
||||
annotations:
|
||||
description: |
|
||||
{{$labels.instance}} on cluster {{$labels.solr_cluster}} has had a high document indexing value of {{ printf "%.0f" $value }}% on core {{$labels.core}}, which is above the threshold of 30.
|
||||
summary: A sudden spike in document indexing could indicate unintended or malicious
|
||||
bulk updates.
|
||||
expr: |
|
||||
100 * sum without(base_url, category, collection, handler, replica, shard) (increase(solr_metrics_core_update_handler_adds_total[15m]) / clamp_min(avg_over_time(solr_metrics_core_update_handler_adds_total[15m]), 1)) > 30
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
1963
assets/apache-solr/dashboards/apache-solr-cluster-overview.json
Normal file
1963
assets/apache-solr/dashboards/apache-solr-cluster-overview.json
Normal file
File diff suppressed because it is too large
Load diff
325
assets/apache-solr/dashboards/apache-solr-logs-overview.json
Normal file
325
assets/apache-solr/dashboards/apache-solr-logs-overview.json
Normal file
|
@ -0,0 +1,325 @@
|
|||
{
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": false,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"apache-solr-mixin"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Other Apache Solr dashboards",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"description": "Logs volume grouped by \"level\" label.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"drawStyle": "bars",
|
||||
"fillOpacity": 50,
|
||||
"stacking": {
|
||||
"mode": "normal"
|
||||
}
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(E|e)merg|(F|f)atal|(A|a)lert|(C|c)rit.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "purple",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(E|e)(rr.*|RR.*)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "red",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(W|w)(arn.*|ARN.*|rn|RN)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "orange",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(N|n)(otice|ote)|(I|i)(nf.*|NF.*)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "green",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "dbg.*|DBG.*|(D|d)(EBUG|ebug)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "blue",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(T|t)(race|RACE)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "light-blue",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "logs"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "text",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 24
|
||||
},
|
||||
"id": 1,
|
||||
"interval": "30s",
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"expr": "sum by (level) (count_over_time({job=~\"integrations/apache-solr\",job=~\"$job\",solr_cluster=~\"$solr_cluster\",instance=~\"$instance\",level=~\"$level\",filename=~\"$filename\"}\n|~ \"$regex_search\"\n\n[$__interval]))\n",
|
||||
"legendFormat": "{{ level }}"
|
||||
}
|
||||
],
|
||||
"title": "Logs volume",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "renameByRegex",
|
||||
"options": {
|
||||
"regex": "Value",
|
||||
"renamePattern": "logs"
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 18,
|
||||
"w": 24
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"dedupStrategy": "exact",
|
||||
"enableLogDetails": true,
|
||||
"prettifyLogMessage": true,
|
||||
"showTime": false,
|
||||
"wrapLogMessage": true
|
||||
},
|
||||
"pluginVersion": "v10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"expr": "{job=~\"integrations/apache-solr\",job=~\"$job\",solr_cluster=~\"$solr_cluster\",instance=~\"$instance\",level=~\"$level\",filename=~\"$filename\"} \n|~ \"$regex_search\"\n\n\n"
|
||||
}
|
||||
],
|
||||
"title": "Logs",
|
||||
"type": "logs"
|
||||
}
|
||||
],
|
||||
"refresh": "1m",
|
||||
"schemaVersion": 36,
|
||||
"tags": [
|
||||
"apache-solr-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"label": "Loki data source",
|
||||
"name": "loki_datasource",
|
||||
"query": "loki",
|
||||
"regex": "(?!grafanacloud.+usage-insights|grafanacloud.+alert-state-history).+",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "Job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"query": "label_values({job=~\"integrations/apache-solr\"}, job)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "Solr_cluster",
|
||||
"multi": true,
|
||||
"name": "solr_cluster",
|
||||
"query": "label_values({job=~\"integrations/apache-solr\",job=~\"$job\"}, solr_cluster)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "Instance",
|
||||
"multi": true,
|
||||
"name": "instance",
|
||||
"query": "label_values({job=~\"integrations/apache-solr\",job=~\"$job\",solr_cluster=~\"$solr_cluster\"}, instance)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "Level",
|
||||
"multi": true,
|
||||
"name": "level",
|
||||
"query": "label_values({job=~\"integrations/apache-solr\",job=~\"$job\",solr_cluster=~\"$solr_cluster\",instance=~\"$instance\"}, level)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "Filename",
|
||||
"multi": true,
|
||||
"name": "filename",
|
||||
"query": "label_values({job=~\"integrations/apache-solr\",job=~\"$job\",solr_cluster=~\"$solr_cluster\",instance=~\"$instance\",level=~\"$level\"}, filename)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"label": "Regex search",
|
||||
"name": "regex_search",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "",
|
||||
"value": ""
|
||||
}
|
||||
],
|
||||
"query": "",
|
||||
"type": "textbox"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "Apache Solr logs",
|
||||
"uid": "apache-solr-logs-overview"
|
||||
}
|
1943
assets/apache-solr/dashboards/apache-solr-query-performance.json
Normal file
1943
assets/apache-solr/dashboards/apache-solr-query-performance.json
Normal file
File diff suppressed because it is too large
Load diff
1395
assets/apache-solr/dashboards/apache-solr-resource-monitoring.json
Normal file
1395
assets/apache-solr/dashboards/apache-solr-resource-monitoring.json
Normal file
File diff suppressed because it is too large
Load diff
1
assets/apache-solr/rules.yaml
Normal file
1
assets/apache-solr/rules.yaml
Normal file
|
@ -0,0 +1 @@
|
|||
null
|
47
assets/apache-tomcat/alerts.yaml
Normal file
47
assets/apache-tomcat/alerts.yaml
Normal file
|
@ -0,0 +1,47 @@
|
|||
groups:
|
||||
- name: ApacheTomcatAlerts
|
||||
rules:
|
||||
- alert: ApacheTomcatAlertsHighCpuUsage
|
||||
annotations:
|
||||
description: The CPU usage has been at {{ printf "%.0f" $value }} percent over
|
||||
the last 5 minutes on {{$labels.instance}}, which is above the threshold of
|
||||
80 percent.
|
||||
summary: The instance has a CPU usage higher than the configured threshold.
|
||||
expr: |
|
||||
sum by (job, instance) (jvm_process_cpu_load{job=~"integrations/tomcat"}) > 80
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: ApacheTomcatAlertsHighMemoryUsage
|
||||
annotations:
|
||||
description: The memory usage has been at {{ printf "%.0f" $value }} percent
|
||||
over the last 5 minutes on {{$labels.instance}}, which is above the threshold
|
||||
of 80 percent.
|
||||
summary: The instance has a higher memory usage than the configured threshold.
|
||||
expr: |
|
||||
sum(jvm_memory_usage_used_bytes{job=~"integrations/tomcat"}) by (job, instance) / sum(jvm_physical_memory_bytes{job=~"integrations/tomcat"}) by (job, instance) * 100 > 80
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: ApacheTomcatAlertsHighRequestErrorPercent
|
||||
annotations:
|
||||
description: The percentage of request errors has been at {{ printf "%.0f" $value
|
||||
}} percent over the last 5 minutes on {{$labels.instance}}, which is above
|
||||
the threshold of 5 percent.
|
||||
summary: There are a high number of request errors.
|
||||
expr: |
|
||||
sum by (job, instance) (increase(tomcat_errorcount_total[5m]) / increase(tomcat_requestcount_total[5m]) * 100) > 5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: ApacheTomcatAlertsModeratelyHighProcessingTime
|
||||
annotations:
|
||||
description: The processing time has been at {{ printf "%.0f" $value }}ms over
|
||||
the last 5 minutes on {{$labels.instance}}, which is above the threshold of
|
||||
300ms.
|
||||
summary: The processing time has been moderately high.
|
||||
expr: |
|
||||
sum by (job, instance) (increase(tomcat_processingtime_total[5m]) / increase(tomcat_requestcount_total[5m])) > 300
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
677
assets/apache-tomcat/dashboards/apache-tomcat-hosts.json
Normal file
677
assets/apache-tomcat/dashboards/apache-tomcat-hosts.json
Normal file
|
@ -0,0 +1,677 @@
|
|||
{
|
||||
"__inputs": [ ],
|
||||
"__requires": [ ],
|
||||
"annotations": {
|
||||
"list": [ ]
|
||||
},
|
||||
"description": "",
|
||||
"editable": false,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"id": null,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": false,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"apache-tomcat-mixin"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Other Apache Tomcat dashboards",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "The number of different types of sessions created for a Tomcat host",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineStyle": {
|
||||
"fill": "solid"
|
||||
},
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum(increase(tomcat_session_sessioncounter_total{job=~\"$job\", instance=~\"$instance\", host=~\"$host\", context=~\"$context\"}[$__interval:] offset -$__interval)) by (job, instance)",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - total sessions"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum(increase(tomcat_session_rejectedsessions_total{job=~\"$job\", instance=~\"$instance\", host=~\"$host\", context=~\"$context\"}[$__interval:] offset -$__interval)) by (job, instance)",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - rejected"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum(increase(tomcat_session_expiredsessions_total{job=~\"$job\", instance=~\"$instance\", host=~\"$host\", context=~\"$context\"}[$__interval:] offset -$__interval)) by (job, instance)",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - expired"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "increase(tomcat_session_sessioncounter_total{job=~\"$job\", instance=~\"$instance\", host=~\"$host\", context=~\"$context\"}[$__interval:] offset -$__interval)",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - {{host}}{{context}} - sessions"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "increase(tomcat_session_rejectedsessions_total{job=~\"$job\", instance=~\"$instance\", host=~\"$host\", context=~\"$context\"}[$__interval:] offset -$__interval)",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - {{host}}{{context}} - rejected"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "increase(tomcat_session_expiredsessions_total{job=~\"$job\", instance=~\"$instance\", host=~\"$host\", context=~\"$context\"}[$__interval:] offset -$__interval)",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - {{host}}{{context}} - expired"
|
||||
}
|
||||
],
|
||||
"title": "Sessions",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "The average time taken to process recent sessions for a Tomcat host",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "ms"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum(increase(tomcat_session_processingtime_total{job=~\"$job\", instance=~\"$instance\", host=~\"$host\", context=~\"$context\"}[$__interval:] offset -$__interval) / clamp_min(increase(tomcat_session_sessioncounter_total{job=~\"$job\", instance=~\"$instance\", host=~\"$host\", context=~\"$context\"}[$__interval:] offset -$__interval), 1)) by (job, instance)",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - total"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "increase(tomcat_session_processingtime_total{job=~\"$job\", instance=~\"$instance\", host=~\"$host\", context=~\"$context\"}[$__interval:] offset -$__interval) / clamp_min(increase(tomcat_session_sessioncounter_total{job=~\"$job\", instance=~\"$instance\", host=~\"$host\", context=~\"$context\"}[$__interval:] offset -$__interval), 1)",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - {{host}}{{context}}"
|
||||
}
|
||||
],
|
||||
"title": "Session processing time",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 10
|
||||
},
|
||||
"id": 4,
|
||||
"targets": [ ],
|
||||
"title": "Servlet",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "The total requests and errors for a Tomcat servlet",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "r/s"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 11
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum(rate(tomcat_servlet_requestcount_total{instance=~\"$instance\", job=~\"$job\", module=~\"$host$context\", servlet=~\"$servlet\"}[$__rate_interval])) by (job, instance)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - total requests"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum(rate(tomcat_servlet_errorcount_total{instance=~\"$instance\", job=~\"$job\", module=~\"$host$context\", servlet=~\"$servlet\"}[$__rate_interval])) by (job, instance)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - total errors"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "rate(tomcat_servlet_requestcount_total{instance=~\"$instance\", job=~\"$job\", module=~\"$host$context\", servlet=~\"$servlet\"}[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - {{module}}{{servlet}} - requests"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "rate(tomcat_servlet_errorcount_total{instance=~\"$instance\", job=~\"$job\", module=~\"$host$context\", servlet=~\"$servlet\"}[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - {{module}}{{servlet}} - errors"
|
||||
}
|
||||
],
|
||||
"title": "Servlet requests",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "The average time taken to process recent requests in a Tomcat servlet",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "ms"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 11
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum(increase(tomcat_servlet_processingtime_total{instance=~\"$instance\", job=~\"$job\", module=~\"$host$context\", servlet=~\"$servlet\"}[$__interval:] offset -$__interval) / clamp_min(increase(tomcat_servlet_requestcount_total{instance=~\"$instance\", job=~\"$job\", module=~\"$host$context\", servlet=~\"$servlet\"}[$__interval:] offset -$__interval), 1)) by (job, instance)",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - total"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "increase(tomcat_servlet_processingtime_total{instance=~\"$instance\", job=~\"$job\", module=~\"$host$context\", servlet=~\"$servlet\"}[$__interval:] offset -$__interval) / clamp_min(increase(tomcat_servlet_requestcount_total{instance=~\"$instance\", job=~\"$job\", module=~\"$host$context\", servlet=~\"$servlet\"}[$__interval:] offset -$__interval), 1)",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - {{module}}{{servlet}}"
|
||||
}
|
||||
],
|
||||
"title": "Servlet processing time",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "1m",
|
||||
"rows": [ ],
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"apache-tomcat-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": { },
|
||||
"hide": 0,
|
||||
"label": "Data Source",
|
||||
"name": "prometheus_datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": "",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Job",
|
||||
"multi": false,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(tomcat_bytesreceived_total, job)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": "",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Instance",
|
||||
"multi": false,
|
||||
"name": "instance",
|
||||
"options": [ ],
|
||||
"query": "label_values(tomcat_bytesreceived_total, instance)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": "",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Host",
|
||||
"multi": true,
|
||||
"name": "host",
|
||||
"options": [ ],
|
||||
"query": "label_values(tomcat_session_sessioncounter_total{instance=~\"$instance\"}, host)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": "",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Context",
|
||||
"multi": true,
|
||||
"name": "context",
|
||||
"options": [ ],
|
||||
"query": "label_values(tomcat_session_sessioncounter_total{host=~\"$host\"}, context)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": "",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Servlet",
|
||||
"multi": true,
|
||||
"name": "servlet",
|
||||
"options": [ ],
|
||||
"query": "label_values(tomcat_servlet_requestcount_total{module=~\"$host$context\"}, servlet)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "default",
|
||||
"title": "Apache Tomcat hosts",
|
||||
"uid": "apache-tomcat-hosts",
|
||||
"version": 0
|
||||
}
|
979
assets/apache-tomcat/dashboards/apache-tomcat-overview.json
Normal file
979
assets/apache-tomcat/dashboards/apache-tomcat-overview.json
Normal file
|
@ -0,0 +1,979 @@
|
|||
{
|
||||
"__inputs": [ ],
|
||||
"__requires": [ ],
|
||||
"annotations": {
|
||||
"list": [ ]
|
||||
},
|
||||
"description": "",
|
||||
"editable": false,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"id": null,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": false,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"apache-tomcat-mixin"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Other Apache Tomcat dashboards",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "The memory usage of the JVM of the instance",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineStyle": {
|
||||
"fill": "solid"
|
||||
},
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "jvm_memory_usage_used_bytes{job=~\"$job\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - {{area}}"
|
||||
}
|
||||
],
|
||||
"title": "Memory usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "The CPU usage of the JVM process",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "line"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percentunit"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "jvm_process_cpu_load{job=~\"$job\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "CPU usage",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "The sent traffic for a Tomcat connector",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 6
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum(rate(tomcat_bytessent_total{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}[$__rate_interval])) by (job, instance)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - total"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "rate(tomcat_bytessent_total{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - {{protocol}}-{{port}}"
|
||||
}
|
||||
],
|
||||
"title": "Traffic sent",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "The received traffic for a Tomcat connector",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 6
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum(rate(tomcat_bytesreceived_total{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}[$__rate_interval])) by (job, instance)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - total"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "rate(tomcat_bytesreceived_total{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - {{protocol}}-{{port}}"
|
||||
}
|
||||
],
|
||||
"title": "Traffic received",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "The total requests and errors for a Tomcat connector",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "r/s"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 12
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum(rate(tomcat_requestcount_total{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}[$__rate_interval])) by (job, instance)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - total requests"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum(rate(tomcat_errorcount_total{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}[$__rate_interval])) by (job, instance)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - total errors"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "rate(tomcat_requestcount_total{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - {{protocol}}-{{port}} - requests"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "rate(tomcat_errorcount_total{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - {{protocol}}-{{port}} - errors"
|
||||
}
|
||||
],
|
||||
"title": "Requests",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "The average time taken to process recent requests for a Tomcat connector",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"axisSoftMin": 0,
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "line"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 300
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "ms"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 12
|
||||
},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum(increase(tomcat_processingtime_total{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}[$__interval:] offset -$__interval) / clamp_min(increase(tomcat_requestcount_total{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}[$__interval:] offset -$__interval), 1)) by (job, instance)",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - total"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "increase(tomcat_processingtime_total{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}[$__interval:] offset -$__interval) / clamp_min(increase(tomcat_requestcount_total{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}[$__interval:] offset -$__interval), 1)",
|
||||
"format": "time_series",
|
||||
"interval": "1m",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - {{protocol}}-{{port}}"
|
||||
}
|
||||
],
|
||||
"title": "Processing time",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "The number of various threads being used by a Tomcat connector",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 18
|
||||
},
|
||||
"id": 8,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum(tomcat_threadpool_connectioncount{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}) by (job, instance)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - total connections"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum(tomcat_threadpool_pollerthreadcount{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}) by (job, instance)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - poller total"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum(tomcat_threadpool_keepalivecount{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}) by (job, instance)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - idle total"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "sum(tomcat_threadpool_currentthreadcount{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}) by (job, instance)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - active total"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "tomcat_threadpool_connectioncount{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - {{protocol}}-{{port}} - connections"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "tomcat_threadpool_pollerthreadcount{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - {{protocol}}-{{port}} - poller"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "tomcat_threadpool_keepalivecount{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - {{protocol}}-{{port}} - idle"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"expr": "tomcat_threadpool_currentthreadcount{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} - {{protocol}}-{{port}} - active"
|
||||
}
|
||||
],
|
||||
"title": "Threads",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"description": "Recent logs from the Catalina.out logs file\n",
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 24
|
||||
},
|
||||
"id": 9,
|
||||
"options": {
|
||||
"dedupStrategy": "none",
|
||||
"enableLogDetails": true,
|
||||
"prettifyLogMessage": false,
|
||||
"showCommonLabels": false,
|
||||
"showLabels": false,
|
||||
"showTime": false,
|
||||
"sortOrder": "Descending",
|
||||
"wrapLogMessage": false
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "{filename=~\"/var/log/tomcat.*/catalina.out|/opt/tomcat/logs/catalina.out|/Program Files/Apache Software Foundation/Tomcat .*..*/logs/catalina.out\",job=~\"$job\", instance=~\"$instance\"} |= ``",
|
||||
"queryType": "range",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Logs",
|
||||
"type": "logs"
|
||||
}
|
||||
],
|
||||
"refresh": "1m",
|
||||
"rows": [ ],
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"apache-tomcat-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": { },
|
||||
"hide": 0,
|
||||
"label": "Data Source",
|
||||
"name": "prometheus_datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"current": { },
|
||||
"hide": 0,
|
||||
"label": "Loki Datasource",
|
||||
"name": "loki_datasource",
|
||||
"options": [ ],
|
||||
"query": "loki",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": "",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Job",
|
||||
"multi": false,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(tomcat_bytesreceived_total, job)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": "",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Instance",
|
||||
"multi": false,
|
||||
"name": "instance",
|
||||
"options": [ ],
|
||||
"query": "label_values(tomcat_bytesreceived_total, instance)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": "",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Protocol",
|
||||
"multi": true,
|
||||
"name": "protocol",
|
||||
"options": [ ],
|
||||
"query": "label_values(tomcat_bytesreceived_total, protocol)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": "",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Port",
|
||||
"multi": true,
|
||||
"name": "port",
|
||||
"options": [ ],
|
||||
"query": "label_values(tomcat_bytesreceived_total, port)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "default",
|
||||
"title": "Apache Tomcat overview",
|
||||
"uid": "apache-tomcat-overview",
|
||||
"version": 0
|
||||
}
|
1
assets/apache-tomcat/rules.yaml
Normal file
1
assets/apache-tomcat/rules.yaml
Normal file
|
@ -0,0 +1 @@
|
|||
null
|
30
assets/argocd/alerts.yaml
Normal file
30
assets/argocd/alerts.yaml
Normal file
|
@ -0,0 +1,30 @@
|
|||
groups:
|
||||
- name: ArgoCD
|
||||
rules:
|
||||
- alert: ArgoAppOutOfSync
|
||||
annotations:
|
||||
description: Application {{ $labels.name }} has sync status as {{ $labels.sync_status
|
||||
}}.
|
||||
summary: Application is OutOfSync.
|
||||
expr: argocd_app_info{sync_status="OutOfSync"} == 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: ArgoAppSyncFailed
|
||||
annotations:
|
||||
description: Application {{ $labels.name }} has sync phase as {{ $labels.phase
|
||||
}}.
|
||||
summary: Application Sync Failed.
|
||||
expr: argocd_app_sync_total{phase!="Succeeded"} == 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: ArgoAppMissing
|
||||
annotations:
|
||||
description: "ArgoCD has not reported any applications data for the past 15
|
||||
minutes which means that it must be down or not functioning properly. \n"
|
||||
summary: No reported applications in ArgoCD.
|
||||
expr: absent(argocd_app_info)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
2917
assets/argocd/dashboards/argocd-overview.json
Normal file
2917
assets/argocd/dashboards/argocd-overview.json
Normal file
File diff suppressed because it is too large
Load diff
1
assets/argocd/rules.yaml
Normal file
1
assets/argocd/rules.yaml
Normal file
|
@ -0,0 +1 @@
|
|||
null
|
47
assets/asterisk/alerts.yaml
Normal file
47
assets/asterisk/alerts.yaml
Normal file
|
@ -0,0 +1,47 @@
|
|||
groups:
|
||||
- name: AsteriskAlerts
|
||||
rules:
|
||||
- alert: AsteriskRestarted
|
||||
annotations:
|
||||
description: |-
|
||||
Asterisk instance restarted in the last minute
|
||||
VALUE = {{ $value }}
|
||||
LABELS = {{ $labels }}
|
||||
summary: Asterisk instance restarted in the last minute.
|
||||
expr: asterisk_core_uptime_seconds < 60
|
||||
for: 5s
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: AsteriskReloaded
|
||||
annotations:
|
||||
description: |-
|
||||
Asterisk instance reloaded in the last minute
|
||||
VALUE = {{ $value }}
|
||||
LABELS = {{ $labels }}
|
||||
summary: Asterisk instance reloaded in the last minute.
|
||||
expr: asterisk_core_last_reload_seconds < 60
|
||||
for: 5s
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: AsteriskHighScrapeTime
|
||||
annotations:
|
||||
description: |-
|
||||
Asterisk instance core high scrape time (Possible system performance degradation)
|
||||
VALUE = {{ $value }}
|
||||
LABELS = {{ $labels }}
|
||||
summary: Asterisk instance core high scrape time.
|
||||
expr: asterisk_core_scrape_time_ms > 100
|
||||
for: 10s
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: AsteriskHighActiveCallsCount
|
||||
annotations:
|
||||
description: |-
|
||||
Asterisk high active call count
|
||||
VALUE = {{ $value }}
|
||||
LABELS = {{ $labels }}
|
||||
summary: Asterisk high active call count.
|
||||
expr: asterisk_calls_count > 100
|
||||
for: 10s
|
||||
labels:
|
||||
severity: warning
|
778
assets/asterisk/dashboards/asterisk-logs.json
Normal file
778
assets/asterisk/dashboards/asterisk-logs.json
Normal file
|
@ -0,0 +1,778 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [ ],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": 2,
|
||||
"iteration": 1645648005559,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": false,
|
||||
"icon": "external link",
|
||||
"includeVars": false,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"asterisk-integration"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Asterisk Dashboards",
|
||||
"tooltip": "",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 2
|
||||
},
|
||||
"id": 8,
|
||||
"panels": [ ],
|
||||
"title": "Logs Overview",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 0,
|
||||
"y": 3
|
||||
},
|
||||
"id": 16,
|
||||
"options": {
|
||||
"content": "<img style=\"margin: 20px;\" src=\"https://storage.googleapis.com/grafanalabs-integration-logos/asterisk.png\">",
|
||||
"mode": "html"
|
||||
},
|
||||
"pluginVersion": "8.4.0",
|
||||
"type": "text"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"description": "Total number of log lines",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"fixedColor": "super-light-blue",
|
||||
"mode": "fixed"
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 3,
|
||||
"x": 4,
|
||||
"y": 3
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"sum"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "8.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"expr": "count_over_time({job=\"$job\", filename=\"$filename\"}[$__interval])",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Total Log Lines",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"description": "Total number of log lines with the type \"WARNING\"",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"fixedColor": "orange",
|
||||
"mode": "fixed"
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 3,
|
||||
"x": 7,
|
||||
"y": 3
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"sum"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "8.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"expr": "count_over_time({job=\"$job\", filename=\"$filename\"} |= \"WARNING\" [$__interval])",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Warnings",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"description": "Total number of log lines with the type \"ERROR\"",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"fixedColor": "red",
|
||||
"mode": "fixed"
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 3,
|
||||
"x": 10,
|
||||
"y": 3
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"sum"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "8.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"expr": "count_over_time({job=\"$job\", filename=\"$filename\"} |= \"ERROR\" [$__interval])",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Errors",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"description": "Total number of log lines with the type \"ERROR\"",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"fixedColor": "red",
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "super-light-red",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "light-red",
|
||||
"value": 25
|
||||
},
|
||||
{
|
||||
"color": "dark-red",
|
||||
"value": 50
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 13,
|
||||
"y": 3
|
||||
},
|
||||
"id": 13,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"sum"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "8.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"expr": "( count_over_time({job=\"$job\", filename=\"$filename\"} |= \"ERROR\" [$__interval]) / count_over_time({job=\"$job\", filename=\"$filename\"} [$__interval]) )",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Error Percentage",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"description": "Total number of bytes used by the log file in the time period",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"fixedColor": "purple",
|
||||
"mode": "fixed"
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "super-light-red",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 17,
|
||||
"y": 3
|
||||
},
|
||||
"id": 14,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"sum"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "8.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"expr": "bytes_over_time({job=\"$job\", filename=\"$filename\"} [$__interval])",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Bytes Used",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "fixed"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"axisSoftMin": 0,
|
||||
"fillOpacity": 50,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineWidth": 1,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byFrameRefID",
|
||||
"options": "A"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "Lines"
|
||||
},
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "super-light-blue",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byFrameRefID",
|
||||
"options": "B"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "Warnings"
|
||||
},
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "orange",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byFrameRefID",
|
||||
"options": "C"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "displayName",
|
||||
"value": "Errors"
|
||||
},
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "red",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 7
|
||||
},
|
||||
"id": 12,
|
||||
"interval": "10s",
|
||||
"options": {
|
||||
"barRadius": 0.25,
|
||||
"barWidth": 0.69999999999999996,
|
||||
"groupWidth": 0.5,
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"orientation": "auto",
|
||||
"showValue": "never",
|
||||
"stacking": "none",
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
},
|
||||
"xTickLabelRotation": 0,
|
||||
"xTickLabelSpacing": 100
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"expr": "count_over_time({job=\"$job\", filename=\"$filename\"}[$__interval])",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"expr": "count_over_time({job=\"$job\", filename=\"$filename\"} |= \"WARNING\" [$__interval])",
|
||||
"hide": false,
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"expr": "count_over_time({job=\"$job\", filename=\"$filename\"} |= \"ERROR\" [$__interval])",
|
||||
"hide": false,
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"title": "Historical Logs / Warnings / Errors",
|
||||
"type": "barchart"
|
||||
},
|
||||
{
|
||||
"collapsed": true,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 14
|
||||
},
|
||||
"id": 20,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"description": "Errors from the log file",
|
||||
"gridPos": {
|
||||
"h": 12,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 13
|
||||
},
|
||||
"id": 21,
|
||||
"options": {
|
||||
"dedupStrategy": "none",
|
||||
"enableLogDetails": true,
|
||||
"prettifyLogMessage": false,
|
||||
"showCommonLabels": false,
|
||||
"showLabels": false,
|
||||
"showTime": false,
|
||||
"sortOrder": "Descending",
|
||||
"wrapLogMessage": true
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"expr": "{job=~\"$job\", filename=~\"$filename\"} |= \"ERROR\"",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Errors",
|
||||
"type": "logs"
|
||||
}
|
||||
],
|
||||
"title": "Errors",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"collapsed": true,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 15
|
||||
},
|
||||
"id": 18,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"description": "Warnings from the log file",
|
||||
"gridPos": {
|
||||
"h": 12,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 14
|
||||
},
|
||||
"id": 22,
|
||||
"options": {
|
||||
"dedupStrategy": "none",
|
||||
"enableLogDetails": true,
|
||||
"prettifyLogMessage": false,
|
||||
"showCommonLabels": false,
|
||||
"showLabels": false,
|
||||
"showTime": false,
|
||||
"sortOrder": "Descending",
|
||||
"wrapLogMessage": true
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"expr": "{job=~\"$job\", filename=~\"$filename\"} |= \"WARNING\"",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Warnings",
|
||||
"type": "logs"
|
||||
}
|
||||
],
|
||||
"title": "Warnings",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"collapsed": true,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 16
|
||||
},
|
||||
"id": 10,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"description": "*Asterisk Full Log File* The \"full\" log is the most detailed, describing each call in great detail.",
|
||||
"gridPos": {
|
||||
"h": 12,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 16
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"dedupStrategy": "none",
|
||||
"enableLogDetails": true,
|
||||
"prettifyLogMessage": false,
|
||||
"showCommonLabels": false,
|
||||
"showLabels": false,
|
||||
"showTime": false,
|
||||
"sortOrder": "Descending",
|
||||
"wrapLogMessage": true
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"expr": "{job=~\"$job\", filename=~\"$filename\"}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Full Log File",
|
||||
"type": "logs"
|
||||
}
|
||||
],
|
||||
"title": "Complete Log File",
|
||||
"type": "row"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 35,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"asterisk-integration"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "Loki",
|
||||
"value": "Loki"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Data Source",
|
||||
"multi": false,
|
||||
"name": "loki_datasource",
|
||||
"options": [ ],
|
||||
"query": "loki",
|
||||
"refresh": 1,
|
||||
"regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+",
|
||||
"skipUrlSync": false,
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "integrations/asterisk-logs",
|
||||
"value": "integrations/asterisk-logs"
|
||||
},
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"definition": "label_values(job)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Job",
|
||||
"multi": false,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(job)",
|
||||
"refresh": 1,
|
||||
"regex": "^.*asterisk.*",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "/var/log/asterisk/full",
|
||||
"value": "/var/log/asterisk/full"
|
||||
},
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"definition": "label_values(filename)",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "File Name",
|
||||
"multi": false,
|
||||
"name": "filename",
|
||||
"options": [ ],
|
||||
"query": "label_values(filename)",
|
||||
"refresh": 1,
|
||||
"regex": ".*asterisk.+",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": { },
|
||||
"timezone": "",
|
||||
"title": "Asterisk - Logs",
|
||||
"uid": "integration_asterisk_logs",
|
||||
"version": 3,
|
||||
"weekStart": ""
|
||||
}
|
2403
assets/asterisk/dashboards/asterisk-overview.json
Normal file
2403
assets/asterisk/dashboards/asterisk-overview.json
Normal file
File diff suppressed because it is too large
Load diff
1
assets/asterisk/rules.yaml
Normal file
1
assets/asterisk/rules.yaml
Normal file
|
@ -0,0 +1 @@
|
|||
null
|
1
assets/awx/alerts.yaml
Normal file
1
assets/awx/alerts.yaml
Normal file
|
@ -0,0 +1 @@
|
|||
null
|
1859
assets/awx/dashboards/awx.json
Normal file
1859
assets/awx/dashboards/awx.json
Normal file
File diff suppressed because it is too large
Load diff
1
assets/awx/rules.yaml
Normal file
1
assets/awx/rules.yaml
Normal file
|
@ -0,0 +1 @@
|
|||
null
|
37
assets/blackbox_exporter/alerts.yaml
Normal file
37
assets/blackbox_exporter/alerts.yaml
Normal file
|
@ -0,0 +1,37 @@
|
|||
groups:
|
||||
- name: blackbox-exporter.rules
|
||||
rules:
|
||||
- alert: BlackboxProbeFailed
|
||||
annotations:
|
||||
dashboard_url: https://grafana.com/d/blackbox-exporter-j4da/blackbox-exporter?instance={{
|
||||
$labels.instance }}
|
||||
description: The probe failed for the instance {{ $labels.instance }}.
|
||||
summary: Probe has failed for the past 1m interval.
|
||||
expr: |
|
||||
probe_success{job="blackbox-exporter"} == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: BlackboxLowUptime30d
|
||||
annotations:
|
||||
dashboard_url: https://grafana.com/d/blackbox-exporter-j4da/blackbox-exporter?instance={{
|
||||
$labels.instance }}
|
||||
description: The probe has a lower uptime than 99.9% the last 30 days for the
|
||||
instance {{ $labels.instance }}.
|
||||
summary: Probe uptime is lower than 99.9% for the last 30 days.
|
||||
expr: |
|
||||
avg_over_time(probe_success{job="blackbox-exporter"}[30d]) * 100 < 99.900000000000006
|
||||
labels:
|
||||
severity: info
|
||||
- alert: BlackboxSslCertificateWillExpireSoon
|
||||
annotations:
|
||||
dashboard_url: https://grafana.com/d/blackbox-exporter-j4da/blackbox-exporter?instance={{
|
||||
$labels.instance }}
|
||||
description: |
|
||||
The SSL certificate of the instance {{ $labels.instance }} is expiring within 21 days.
|
||||
Actual time left: {{ $value | humanizeDuration }}.
|
||||
summary: SSL certificate will expire soon.
|
||||
expr: |
|
||||
probe_ssl_earliest_cert_expiry{job="blackbox-exporter"} - time() < 21 * 24 * 3600
|
||||
labels:
|
||||
severity: warning
|
947
assets/blackbox_exporter/dashboards/blackbox-exporter.json
Normal file
947
assets/blackbox_exporter/dashboards/blackbox-exporter.json
Normal file
|
@ -0,0 +1,947 @@
|
|||
{
|
||||
"__inputs": [ ],
|
||||
"__requires": [ ],
|
||||
"description": "A dashboard that monitors the Blackbox-exporter. It is created using the blackbox-exporter-mixin for the the (blackbox-exporter)[https://github.com/prometheus/blackbox-exporter].",
|
||||
"editable": true,
|
||||
"panels": [
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"title": "Summary",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"links": [
|
||||
{
|
||||
"targetBlank": true,
|
||||
"title": "Go To Probe",
|
||||
"type": "link",
|
||||
"url": "d/blackbox-exporter-j4da/blackbox-exporter?var-instance=${__field.labels.instance}&var-job=${__field.labels.job}"
|
||||
}
|
||||
],
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"0": {
|
||||
"color": "red",
|
||||
"text": "Down"
|
||||
},
|
||||
"1": {
|
||||
"color": "green",
|
||||
"text": "Up"
|
||||
}
|
||||
},
|
||||
"type": "value"
|
||||
}
|
||||
],
|
||||
"unit": "short"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 1
|
||||
},
|
||||
"id": 2,
|
||||
"maxDataPoints": 100,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
]
|
||||
},
|
||||
"text": {
|
||||
"titleSize": 18,
|
||||
"valueSize": 18
|
||||
},
|
||||
"textMode": "value_and_name"
|
||||
},
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "probe_success{\n job=~\"$job\"\n}\n",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"title": "Status Map",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0.001
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 6
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
]
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "count(\n probe_success{\n job=~\"$job\"\n }\n)\n"
|
||||
}
|
||||
],
|
||||
"title": "Probes",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 0.98999999999999999
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0.999
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 6
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
]
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "(\n count(\n probe_success{\n job=~\"$job\"\n } == 1\n )\n OR vector(0)\n) /\ncount(\n probe_success{\n job=~\"$job\"\n }\n)\n"
|
||||
}
|
||||
],
|
||||
"title": "Probes Success",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0.999
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 6
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
]
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "count(\n probe_http_ssl{\n job=~\"$job\"\n } == 1\n) /\ncount(\n probe_http_version{\n job=~\"$job\"\n }\n)\n"
|
||||
}
|
||||
],
|
||||
"title": "Probes SSL",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "s"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 6
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
]
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "avg(\n probe_duration_seconds{\n job=~\"$job\"\n }\n)\n"
|
||||
}
|
||||
],
|
||||
"title": "Probe Average Duration",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 10
|
||||
},
|
||||
"id": 7,
|
||||
"repeat": "instance",
|
||||
"title": "$instance",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 0.98999999999999999
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0.999
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 11
|
||||
},
|
||||
"id": 8,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"mean"
|
||||
]
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "probe_success{\n job=~\"$job\",\n instance=~\"$instance\"\n}\n"
|
||||
}
|
||||
],
|
||||
"title": "Uptime",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"0": {
|
||||
"color": "red",
|
||||
"text": "No"
|
||||
},
|
||||
"1": {
|
||||
"color": "green",
|
||||
"text": "Yes"
|
||||
}
|
||||
},
|
||||
"type": "value"
|
||||
}
|
||||
],
|
||||
"unit": "short"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 3,
|
||||
"x": 0,
|
||||
"y": 15
|
||||
},
|
||||
"id": 9,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
]
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "probe_success{\n job=~\"$job\",\n instance=~\"$instance\"\n}\n",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "Probe Success",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "blue",
|
||||
"value": 300
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 400
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 500
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 3,
|
||||
"x": 3,
|
||||
"y": 15
|
||||
},
|
||||
"id": 10,
|
||||
"options": {
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
]
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "probe_http_status_code{\n job=~\"$job\",\n instance=~\"$instance\"\n}\n",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "Latest Response Code",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"0": {
|
||||
"color": "red",
|
||||
"text": "No"
|
||||
},
|
||||
"1": {
|
||||
"color": "green",
|
||||
"text": "Yes"
|
||||
}
|
||||
},
|
||||
"type": "value"
|
||||
}
|
||||
],
|
||||
"unit": "short"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 3,
|
||||
"x": 0,
|
||||
"y": 18
|
||||
},
|
||||
"id": 11,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
]
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "probe_http_ssl{\n job=~\"$job\",\n instance=~\"$instance\"\n}\n",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "SSL",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 3,
|
||||
"x": 3,
|
||||
"y": 18
|
||||
},
|
||||
"id": 12,
|
||||
"options": {
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
]
|
||||
},
|
||||
"textMode": "name"
|
||||
},
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "probe_tls_version_info{\n job=~\"$job\",\n instance=~\"$instance\"\n}\n",
|
||||
"instant": true,
|
||||
"legendFormat": "{{version}}"
|
||||
}
|
||||
],
|
||||
"title": "SSL Version",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 1814400
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 21
|
||||
},
|
||||
"id": 13,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "none"
|
||||
},
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "probe_ssl_earliest_cert_expiry{\n job=~\"$job\",\n instance=~\"$instance\"\n} - time()\n"
|
||||
}
|
||||
],
|
||||
"title": "SSL Certificate Expiry",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"0": {
|
||||
"color": "green",
|
||||
"text": "No"
|
||||
},
|
||||
"1": {
|
||||
"color": "blue",
|
||||
"text": "Yes"
|
||||
}
|
||||
},
|
||||
"type": "value"
|
||||
}
|
||||
],
|
||||
"unit": "short"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 3,
|
||||
"x": 0,
|
||||
"y": 24
|
||||
},
|
||||
"id": 14,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
]
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "probe_http_redirects{\n job=~\"$job\",\n instance=~\"$instance\"\n}\n",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"title": "Redirects",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "short"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 3,
|
||||
"x": 3,
|
||||
"y": 24
|
||||
},
|
||||
"id": 15,
|
||||
"options": {
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
]
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "probe_http_version{\n job=~\"$job\",\n instance=~\"$instance\"\n}\n",
|
||||
"instant": true,
|
||||
"legendFormat": "{{version}}"
|
||||
}
|
||||
],
|
||||
"title": "HTTP Version",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "s"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 3,
|
||||
"x": 0,
|
||||
"y": 27
|
||||
},
|
||||
"id": 16,
|
||||
"options": {
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"mean"
|
||||
]
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "probe_duration_seconds{\n job=~\"$job\",\n instance=~\"$instance\"\n}\n"
|
||||
}
|
||||
],
|
||||
"title": "Average Latency",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "s"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 3,
|
||||
"x": 3,
|
||||
"y": 27
|
||||
},
|
||||
"id": 17,
|
||||
"options": {
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"mean"
|
||||
]
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "probe_dns_lookup_time_seconds{\n job=~\"$job\",\n instance=~\"$instance\"\n}\n"
|
||||
}
|
||||
],
|
||||
"title": "Average Latency",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"spanNulls": false
|
||||
},
|
||||
"unit": "s"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 18,
|
||||
"x": 6,
|
||||
"y": 11
|
||||
},
|
||||
"id": 18,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true,
|
||||
"sortBy": "Mean",
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "sum(\n probe_http_duration_seconds{\n job=~\"$job\",\n instance=~\"$instance\"\n }\n) by (instance)\n",
|
||||
"legendFormat": "HTTP duration"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "sum(\n probe_duration_seconds{\n job=~\"$job\",\n instance=~\"$instance\"\n }\n) by (instance)\n",
|
||||
"legendFormat": "Total probe duration"
|
||||
}
|
||||
],
|
||||
"title": "Probe Duration",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 100,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"mode": "percent"
|
||||
}
|
||||
},
|
||||
"unit": "s"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 18,
|
||||
"x": 6,
|
||||
"y": 21
|
||||
},
|
||||
"id": 19,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true,
|
||||
"sortBy": "Mean",
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v10.4.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "sum(\n probe_http_duration_seconds{\n job=~\"$job\",\n instance=~\"$instance\"\n }\n) by (phase)\n",
|
||||
"legendFormat": "{{ phase }}"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "sum(\n probe_icmp_duration_seconds{\n job=~\"$job\",\n instance=~\"$instance\"\n }\n) by (phase)\n",
|
||||
"legendFormat": "{{ phase }}"
|
||||
}
|
||||
],
|
||||
"title": "Probe Phases",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 36,
|
||||
"tags": [
|
||||
"blackbox-exporter",
|
||||
"blackbox-exporter-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"label": "Data source",
|
||||
"name": "datasource",
|
||||
"query": "prometheus",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "Job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"query": "label_values(probe_success{}, job)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"includeAll": false,
|
||||
"label": "Instance",
|
||||
"multi": false,
|
||||
"name": "instance",
|
||||
"query": "label_values(probe_success{job=~\"$job\"}, instance)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-2d",
|
||||
"to": "now"
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "Blackbox Exporter",
|
||||
"uid": "blackbox-exporter-j4da"
|
||||
}
|
1
assets/blackbox_exporter/rules.yaml
Normal file
1
assets/blackbox_exporter/rules.yaml
Normal file
|
@ -0,0 +1 @@
|
|||
null
|
1
assets/caddy/alerts.yaml
Normal file
1
assets/caddy/alerts.yaml
Normal file
|
@ -0,0 +1 @@
|
|||
null
|
761
assets/caddy/dashboards/caddy-overview.json
Normal file
761
assets/caddy/dashboards/caddy-overview.json
Normal file
|
@ -0,0 +1,761 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "grafana"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"limit": 100,
|
||||
"name": "Annotations & Alerts",
|
||||
"showIn": 0,
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [ ],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"gnetId": 13460,
|
||||
"graphTooltip": 0,
|
||||
"id": 10,
|
||||
"iteration": 1633116262227,
|
||||
"links": [ ],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"log": 10,
|
||||
"type": "log"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 11,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": false
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "sum(rate(caddy_http_requests_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (handler)",
|
||||
"interval": "",
|
||||
"legendFormat": "{{handler}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Requests",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"description": "",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "normal"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 11,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "sum(irate(caddy_http_request_duration_seconds_count{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (code)",
|
||||
"interval": "",
|
||||
"legendFormat": "{{code}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Requests by Response Code",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 11
|
||||
},
|
||||
"id": 8,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": false
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "avg(avg_over_time(caddy_http_requests_in_flight{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (handler)",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"legendFormat": "{{handler}}",
|
||||
"refId": "E"
|
||||
}
|
||||
],
|
||||
"title": "Requests In Flight",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"description": "",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "bars",
|
||||
"fillOpacity": 100,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "normal"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 11
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "sum(irate(caddy_http_request_duration_seconds_count{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (code)",
|
||||
"interval": "",
|
||||
"legendFormat": "{{code}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Requests by Response Code (%)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"log": 2,
|
||||
"type": "log"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 20
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, sum(rate(caddy_http_request_duration_seconds_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (le))",
|
||||
"interval": "",
|
||||
"legendFormat": "p99",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "histogram_quantile(0.95, sum(rate(caddy_http_request_duration_seconds_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (le))",
|
||||
"interval": "",
|
||||
"legendFormat": "p95",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "histogram_quantile(0.90, sum(rate(caddy_http_request_duration_seconds_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (le))",
|
||||
"interval": "",
|
||||
"legendFormat": "p90",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "histogram_quantile(0.75, sum(rate(caddy_http_request_duration_seconds_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (le))",
|
||||
"interval": "",
|
||||
"legendFormat": "p75",
|
||||
"refId": "D"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "histogram_quantile(0.5, sum(rate(caddy_http_request_duration_seconds_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (le))",
|
||||
"interval": "",
|
||||
"legendFormat": "p50",
|
||||
"refId": "E"
|
||||
}
|
||||
],
|
||||
"title": "Request Duration (percentile)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"cards": { },
|
||||
"color": {
|
||||
"cardColor": "#b4ff00",
|
||||
"colorScale": "linear",
|
||||
"colorScheme": "interpolateInferno",
|
||||
"exponent": 0.5,
|
||||
"mode": "spectrum"
|
||||
},
|
||||
"dataFormat": "tsbuckets",
|
||||
"datasource": {
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
}
|
||||
}
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 20
|
||||
},
|
||||
"heatmap": { },
|
||||
"hideZeroBuckets": true,
|
||||
"highlightCards": true,
|
||||
"id": 6,
|
||||
"interval": "",
|
||||
"legend": {
|
||||
"show": true
|
||||
},
|
||||
"maxDataPoints": 25,
|
||||
"options": {
|
||||
"calculate": false,
|
||||
"calculation": { },
|
||||
"cellGap": 2,
|
||||
"cellValues": { },
|
||||
"color": {
|
||||
"exponent": 0.5,
|
||||
"fill": "#b4ff00",
|
||||
"mode": "scheme",
|
||||
"reverse": false,
|
||||
"scale": "exponential",
|
||||
"scheme": "Inferno",
|
||||
"steps": 128
|
||||
},
|
||||
"exemplars": {
|
||||
"color": "rgba(255,0,255,0.7)"
|
||||
},
|
||||
"filterValues": {
|
||||
"le": 1.0000000000000001e-09
|
||||
},
|
||||
"legend": {
|
||||
"show": true
|
||||
},
|
||||
"rowsFrame": {
|
||||
"layout": "auto"
|
||||
},
|
||||
"showValue": "never",
|
||||
"tooltip": {
|
||||
"show": true,
|
||||
"yHistogram": false
|
||||
},
|
||||
"yAxis": {
|
||||
"axisPlacement": "left",
|
||||
"reverse": false,
|
||||
"unit": "s"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"reverseYBuckets": false,
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"expr": "sum(increase(caddy_http_request_duration_seconds_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (le)",
|
||||
"format": "heatmap",
|
||||
"interval": "",
|
||||
"legendFormat": "{{le}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Request Duration (heatmap)",
|
||||
"tooltip": {
|
||||
"show": true,
|
||||
"showHistogram": false
|
||||
},
|
||||
"type": "heatmap",
|
||||
"xAxis": {
|
||||
"show": true
|
||||
},
|
||||
"yAxis": {
|
||||
"format": "s",
|
||||
"logBase": 1,
|
||||
"show": true
|
||||
},
|
||||
"yBucketBound": "auto"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 38,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"caddy-integration"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "Prometheus",
|
||||
"value": "Prometheus"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Data source",
|
||||
"multi": false,
|
||||
"name": "datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"queryValue": "",
|
||||
"refresh": 1,
|
||||
"regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+",
|
||||
"skipUrlSync": false,
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": {
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"definition": "",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(caddy_http_requests_total, job)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": {
|
||||
"uid": "$datasource"
|
||||
},
|
||||
"definition": "label_values(caddy_http_requests_total{job=~\"$job\"}, instance)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Instance",
|
||||
"multi": true,
|
||||
"name": "instance",
|
||||
"options": [ ],
|
||||
"query": {
|
||||
"qryType": 1,
|
||||
"query": "label_values(caddy_http_requests_total{job=~\"$job\"}, instance)",
|
||||
"refId": "PrometheusVariableQueryEditor-VariableQuery"
|
||||
},
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-30m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "Caddy Overview",
|
||||
"uid": "9B0qPnfMz",
|
||||
"version": 9
|
||||
}
|
1
assets/caddy/rules.yaml
Normal file
1
assets/caddy/rules.yaml
Normal file
|
@ -0,0 +1 @@
|
|||
null
|
226
assets/cilium-enterprise/alerts.yaml
Normal file
226
assets/cilium-enterprise/alerts.yaml
Normal file
|
@ -0,0 +1,226 @@
|
|||
groups:
|
||||
- name: Cilium Endpoints
|
||||
rules:
|
||||
- alert: CiliumAgentEndpointFailures
|
||||
annotations:
|
||||
description: Cilium Agent {{$labels.pod}} has endpoints that are in an invalid
|
||||
state. This may result in problems with scheduling Pods, or network connectivity
|
||||
issues.
|
||||
summary: Cilium Agent endpoints in the invalid state.
|
||||
expr: sum(cilium_endpoint_state{endpoint_state="invalid"}) by (pod)
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: CiliumAgentEndpointUpdateFailure
|
||||
annotations:
|
||||
description: |-
|
||||
API calls to Cilium Agent API to create or update Endpoints are failing on pod {{$labels.pod}} ({{$labels.method}} {{$labels.return_code}}).
|
||||
|
||||
This may cause problems for Pod scheduling
|
||||
summary: API calls to Cilium Agent API to create or update Endpoints are failing.
|
||||
expr: sum(rate(cilium_k8s_client_api_calls_total{method=~"(PUT|POST|PATCH)", endpoint="endpoint",return_code!~"2[0-9][0-9]"}[5m]))
|
||||
by (pod, method, return_code)
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: CiliumAgentContainerNetworkInterfaceApiErrorEndpointCreate
|
||||
annotations:
|
||||
description: |-
|
||||
Cilium Endpoint API endpoint rate limiter on Pod {{$labels.pod}} is reporting errors while doing endpoint create.
|
||||
This may cause CNI and prevent Cilium scheduling.
|
||||
summary: Cilium Endpoint API endpoint rate limiter is reporting errors while
|
||||
doing endpoint create.
|
||||
expr: sum(rate(cilium_api_limiter_processed_requests_total{api_call=~"endpoint-create",
|
||||
outcome="fail"}[1m])) by (pod, api_call)
|
||||
for: 5m
|
||||
labels:
|
||||
severity: info
|
||||
- alert: CiliumAgentApiEndpointErrors
|
||||
annotations:
|
||||
description: |-
|
||||
API calls to Cilium Endpoints API on Agent Pod {{$labels.pod}} are failing due to server errors ({{$labels.return_code}}).
|
||||
|
||||
This could indicate issues with Ciliums ability to create endpoints which can result in failure to schedule Kubernetes Pods.
|
||||
summary: API calls to Cilium Endpoints API are failing due to server errors.
|
||||
expr: sum(rate(cilium_agent_api_process_time_seconds_count{return_code=~"5[0-9][0-9]",
|
||||
path="/v1/endpoint"}[5m])) by (pod, return_code)
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- name: Cilium IPAM
|
||||
rules:
|
||||
- alert: CiliumOperatorExhaustedIpamIps
|
||||
annotations:
|
||||
description: |-
|
||||
Cilium Operator {{$labels.pod}} has exhausted its IPAM IPs. This is a critical issue which may cause Pods to fail to be scheduled.
|
||||
|
||||
This may be caused by number of Pods being scheduled exceeding the you cloud platforms network limits or issues with Cilium rate limiting.
|
||||
summary: Cilium Operator has exhausted its IPAM IPs.
|
||||
expr: sum(cilium_operator_ipam_ips{type="available"}) by () <= 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: CiliumOperatorLowAvailableIpamIps
|
||||
annotations:
|
||||
description: |-
|
||||
Cilium Operator {{$labels.pod}} has used up over 90% of its available IPs. If available IPs become exhausted then the operator may not be able to schedule Pods.
|
||||
|
||||
This may be caused by number of Pods being scheduled exceeding the you cloud platforms network limits or issues with Cilium rate limiting.
|
||||
summary: Cilium Operator has used up over 90% of its available IPs.
|
||||
expr: (sum(cilium_operator_ipam_ips{type!="available"}) by () / sum(cilium_operator_ipam_ips)
|
||||
by ()) > 0.9
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: CiliumOperatorEniIpamErrors
|
||||
annotations:
|
||||
description: |-
|
||||
Cilium Operator {{$labels.pod}} has high error rate while trying to create/attach ENIs for IPAM.
|
||||
|
||||
This may be caused by exceeding Node instance ENI/Address limts, as well as errors with Cilium Operators cloud configuration.
|
||||
summary: Cilium Operator has high error rate while trying to create/attach ENIs
|
||||
for IPAM.
|
||||
expr: sum(rate(cilium_operator_ipam_interface_creation_ops{status=~"unable to
|
||||
(create|attach) ENI"}[5m])) by () / count(rate(cilium_operator_ipam_interface_creation_ops{status=~"unable
|
||||
to (create|attach) ENI"}[5m])) by () > 0.0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
- name: Cilium Maps
|
||||
rules:
|
||||
- alert: CiliumAgentMapOperationFailures
|
||||
annotations:
|
||||
description: Cilium Agent {{$labels.pod}} is experiencing errors updating BPF
|
||||
maps on Agent Pod {{$labels.pod}}. Effects may vary depending on map type(s)
|
||||
being affected however this is likely to cause issues with Cilium.
|
||||
summary: Cilium Agent is experiencing errors updating BPF maps on Agent Pod.
|
||||
expr: sum(rate(cilium_bpf_map_ops_total{k8s_app="cilium", outcome="fail"}[5m]))
|
||||
by (map_name, pod) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: CiliumAgentBpfMapPressure
|
||||
annotations:
|
||||
description: Map {{$labels.map_name}} on Cilium Agent Pod is currently experiencing
|
||||
high map pressure. The map is currently over 90% full. Full maps will begin
|
||||
to experience errors on updates which may result in unexpected behaviour.
|
||||
summary: Map on Cilium Agent Pod is currently experiencing high map pressure.
|
||||
expr: cilium_bpf_map_pressure{} > 0.9
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- name: Cilium NAT
|
||||
rules:
|
||||
- alert: CiliumAgentNatTableFull
|
||||
annotations:
|
||||
description: |-
|
||||
Cilium Agent Pod {{$labels.pod}} is dropping packets due to "No mapping for NAT masquerade" errors. This likely means that the Cilium agents NAT table is full.
|
||||
This is a potentially critical issue that can lead to connection issues for packets leaving the cluster network.
|
||||
|
||||
See: https://docs.cilium.io/en/v1.9/concepts/networking/masquerading/ for more info.
|
||||
summary: Cilium Agent Pod is dropping packets due to "No mapping for NAT masquerade"
|
||||
errors.
|
||||
expr: sum(rate(cilium_drop_count_total{reason="No mapping for NAT masquerade"}[1m]))
|
||||
by (pod) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- name: Cilium API
|
||||
rules:
|
||||
- alert: CiliumAgentApiHighErrorRate
|
||||
annotations:
|
||||
description: 'Cilium Agent API on Pod {{$labels.pod}} is experiencing a high
|
||||
error rate for response code: {{$labels.response_code}} on endpoint {{$labels.endpoint}}.'
|
||||
summary: Cilium Agent API on Pod is experiencing a high error rate.
|
||||
expr: sum(rate(cilium_k8s_client_api_calls_total{endpoint!="metrics",return_code!~"2[0-9][0-9]"}[5m]))
|
||||
by (pod, endpoint, return_code)
|
||||
for: 5m
|
||||
labels:
|
||||
severity: info
|
||||
- name: Cilium Conntrack
|
||||
rules:
|
||||
- alert: CiliumAgentConntrackTableFull
|
||||
annotations:
|
||||
description: |-
|
||||
Ciliums conntrack map is failing on new insertions on agent Pod {{$labels.pod}}, this likely means that the conntrack BPF map is full. This is a potentially critical issue and may result in unexpected packet drops.
|
||||
|
||||
If this is firing, it is recommend to look at both CPU/memory resource utilization dashboards. As well as conntrack GC run dashboards for more details on what the issue is.
|
||||
summary: Ciliums conntrack map is failing on new insertions on Agent Pod.
|
||||
expr: 'sum(rate(cilium_drop_count_total{reason="CT: Map insertion failed"}[5m]))
|
||||
by (pod) > 0'
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: CiliumAgentConnTrackFailedGarbageCollectorRuns
|
||||
annotations:
|
||||
description: |-
|
||||
Cilium Agent Conntrack GC runs on Agent Pod {{$labels.pod}} has been reported as not completing. Runs reported "uncompleted" may indicate a problem with ConnTrack GC.
|
||||
Cilium failing to GC its ConnTrack table may cause further ConnTrack issues later. This may result in dropped packets or other issues.
|
||||
summary: Cilium Agent Conntrack GC runs are failing on Agent Pod.
|
||||
expr: sum(rate(cilium_datapath_conntrack_gc_runs_total{status="uncompleted"}[5m]))
|
||||
by (pod) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- name: Cilium Drops
|
||||
rules:
|
||||
- alert: CiliumAgentHighDeniedRate
|
||||
annotations:
|
||||
description: Cilium Agent Pod {{$labels.pod}} is experiencing a high drop rate
|
||||
due to policy rule denies. This could mean that a network policy is not configured
|
||||
correctly, or that a Pod is sending unexpected network traffic
|
||||
summary: Cilium Agent is experiencing a high drop rate due to policy rule denies.
|
||||
expr: sum(rate(cilium_drop_count_total{reason="Policy denied"}[1m])) by (reason,
|
||||
pod) > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: info
|
||||
- name: Cilium Policy
|
||||
rules:
|
||||
- alert: CiliumAgentPolicyMapPressure
|
||||
annotations:
|
||||
description: 'Cilium Agent {{$labels.pod}} is experiencing high BPF map pressure
|
||||
(over 90% full) on policy map: {{$labels.map_name}}. This means that the map
|
||||
is running low on capacity. A full policy map may result in packet drops.'
|
||||
summary: Cilium Agent is experiencing high BPF map pressure.
|
||||
expr: sum(cilium_bpf_map_pressure{map_name=~"cilium_policy_.*"}) by (pod) > 0.9
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- name: Cilium Identity
|
||||
rules:
|
||||
- alert: CiliumNodeLocalHighIdentityAllocation
|
||||
annotations:
|
||||
description: |-
|
||||
Cilium agent Pod {{$labels.pod}} is using a very high percent (over 80%) of its maximum per-node identity limit (65535).
|
||||
|
||||
If this capacity is exhausted Cilium may be unable to allocate new identities. Very high identity allocations can also indicate other problems
|
||||
summary: Cilium is using a very high percent (over 80%) of its maximum per-node
|
||||
identity limit (65535).
|
||||
expr: (sum(cilium_identity{type="node_local"}) by (pod) / (2^16-1)) > 0.8
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: RunningOutOfCiliumClusterIdentities
|
||||
annotations:
|
||||
description: Cilium is using a very high percent of its maximum cluster identity
|
||||
limit ({{value}}/65280) . If this capacity is exhausted Cilium may be unable
|
||||
to allocate new identities. Very high identity allocations can also indicate
|
||||
other problems
|
||||
summary: Cilium is using a very high percent of its maximum cluster identity
|
||||
limit (65280).
|
||||
expr: sum(cilium_identity{type="cluster_local"}) by () / (2^16-256) > .8
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- name: Cilium Nodes
|
||||
rules:
|
||||
- alert: CiliumUnreachableNodes
|
||||
annotations:
|
||||
description: Cilium Agent {{$labels.pod}} is reporting unreachable Nodes in
|
||||
the cluster.
|
||||
summary: Cilium Agent is reporting unreachable Nodes in the cluster.
|
||||
expr: sum(cilium_unreachable_nodes{}) by (pod) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: info
|
1217
assets/cilium-enterprise/dashboards/cilium-L3-policy.json
Normal file
1217
assets/cilium-enterprise/dashboards/cilium-L3-policy.json
Normal file
File diff suppressed because it is too large
Load diff
1088
assets/cilium-enterprise/dashboards/cilium-L7-proxy.json
Normal file
1088
assets/cilium-enterprise/dashboards/cilium-L7-proxy.json
Normal file
File diff suppressed because it is too large
Load diff
1690
assets/cilium-enterprise/dashboards/cilium-agent-overview.json
Normal file
1690
assets/cilium-enterprise/dashboards/cilium-agent-overview.json
Normal file
File diff suppressed because it is too large
Load diff
1715
assets/cilium-enterprise/dashboards/cilium-agent.json
Normal file
1715
assets/cilium-enterprise/dashboards/cilium-agent.json
Normal file
File diff suppressed because it is too large
Load diff
657
assets/cilium-enterprise/dashboards/cilium-api.json
Normal file
657
assets/cilium-enterprise/dashboards/cilium-api.json
Normal file
|
@ -0,0 +1,657 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "grafana"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Dashboard for Cilium v1.12 (https://cilium.io/) Agent metrics",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"gnetId": 16611,
|
||||
"graphTooltip": 1,
|
||||
"id": 3,
|
||||
"iteration": 1664184399070,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": true,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"cilium-overview"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Cilium Overviews",
|
||||
"tooltip": "",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
},
|
||||
{
|
||||
"asDropdown": true,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"cilium-agent"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Cilium Components",
|
||||
"tooltip": "",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 100,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"log": 10,
|
||||
"type": "log"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "normal"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [ ],
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Max per node processingTime"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#e24d42",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Max per node upstreamTime"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#58140c",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "avg(cilium_policy_l7_parse_errors_total{pod=~\"cilium.*\"})"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#bf1b00",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "parse errors"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#bf1b00",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Max per node processingTime"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "short"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Max per node upstreamTime"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "short"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "avg(cilium_policy_l7_parse_errors_total{pod=~\"cilium.*\"})"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "short"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "parse errors"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "short"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 10,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 94,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean"
|
||||
],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.3-e1f2f3c",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "quantile(0.95, rate(cilium_proxy_upstream_reply_seconds_sum{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval]) / rate(cilium_proxy_upstream_reply_seconds_count{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (scope)",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{scope}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "P95 Proxy Response Time",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 100,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "normal"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [ ],
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Max per node processingTime"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#e24d42",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Max per node upstreamTime"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#58140c",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "avg(cilium_policy_l7_parse_errors_total{pod=~\"cilium.*\"})"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#bf1b00",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "parse errors"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#bf1b00",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Max per node processingTime"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "short"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Max per node upstreamTime"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "short"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "avg(cilium_policy_l7_parse_errors_total{pod=~\"cilium.*\"})"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "short"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "parse errors"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "short"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 10,
|
||||
"x": 10,
|
||||
"y": 0
|
||||
},
|
||||
"id": 249,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean"
|
||||
],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.3-e1f2f3c",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "avg(rate(cilium_proxy_upstream_reply_seconds_sum{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval]) / rate(cilium_proxy_upstream_reply_seconds_count{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (scope)",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{scope}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Avg Proxy Response Time",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 36,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"cilium-agent"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Data Source",
|
||||
"multi": false,
|
||||
"name": "prometheus_datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+",
|
||||
"skipUrlSync": false,
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": [
|
||||
"All"
|
||||
],
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"definition": "label_values(cilium_version, cluster)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": {
|
||||
"query": "label_values(cilium_version, cluster)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": [
|
||||
"All"
|
||||
],
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"definition": "label_values(cilium_version{cluster=~\"$cluster\"}, namespace)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "namespace",
|
||||
"options": [ ],
|
||||
"query": {
|
||||
"query": "label_values(cilium_version{cluster=~\"$cluster\"}, namespace)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": "cilium.*",
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"definition": "label_values(cilium_version{cluster=~\"$cluster\", namespace=~\"$namespace\"}, pod)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": false,
|
||||
"name": "pod",
|
||||
"options": [ ],
|
||||
"query": {
|
||||
"query": "label_values(cilium_version{cluster=~\"$cluster\", namespace=~\"$namespace\"}, pod)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "10",
|
||||
"value": "10"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "top k",
|
||||
"name": "k",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "10",
|
||||
"value": "10"
|
||||
}
|
||||
],
|
||||
"query": "10",
|
||||
"skipUrlSync": false,
|
||||
"type": "textbox"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-30m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "Cilium / Components / API",
|
||||
"uid": "integrations_cilium_ent_api",
|
||||
"version": 3,
|
||||
"weekStart": ""
|
||||
}
|
792
assets/cilium-enterprise/dashboards/cilium-bpf.json
Normal file
792
assets/cilium-enterprise/dashboards/cilium-bpf.json
Normal file
|
@ -0,0 +1,792 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "grafana"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Dashboard for Cilium v1.12 (https://cilium.io/) Agent metrics",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"gnetId": 16611,
|
||||
"graphTooltip": 1,
|
||||
"id": 3,
|
||||
"iteration": 1664184399070,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": true,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"cilium-overview"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Cilium Overviews",
|
||||
"tooltip": "",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
},
|
||||
{
|
||||
"asDropdown": true,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"cilium-agent"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Cilium Components",
|
||||
"tooltip": "",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"log": 10,
|
||||
"type": "log"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [ ],
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "ops"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 262,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.3-e1f2f3c",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(cilium_bpf_map_ops_total{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (map_name)",
|
||||
"hide": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "BPF Map Operations",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"log": 10,
|
||||
"type": "log"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [ ],
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "ops"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 286,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.3-e1f2f3c",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(cilium_bpf_map_ops_total{k8s_app=\"cilium\", pod=~\"$pod\", operation=\"delete\"}[$__rate_interval])) by (map_name)",
|
||||
"hide": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "BPF Map Deletes",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"log": 10,
|
||||
"type": "log"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [ ],
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "ops"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"id": 285,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.3-e1f2f3c",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(cilium_bpf_map_ops_total{k8s_app=\"cilium\", pod=~\"$pod\", operation=\"update\"}[$__rate_interval])) by (map_name)",
|
||||
"hide": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "BPF Map Updates",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"links": [ ],
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "Errors/Minute"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 15
|
||||
},
|
||||
"id": 244,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "9.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(cilium_bpf_map_ops_total{k8s_app=\"cilium\", outcome=\"fail\",pod=~\"$pod\"}[$__rate_interval])) * 60",
|
||||
"hide": false,
|
||||
"legendFormat": "{{outcome}}",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "BPF Map Operation Error Rate",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [ ],
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "ops"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 11,
|
||||
"x": 6,
|
||||
"y": 15
|
||||
},
|
||||
"id": 287,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.5-0100a6a",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk($k, sum(rate(cilium_bpf_map_ops_total{k8s_app=\"cilium\", outcome=\"fail\",pod=~\"$pod\"}[$__rate_interval])) by (endpoint, pod, operation))",
|
||||
"hide": false,
|
||||
"legendFormat": "{{pod}} {{operation}} {{endpoint}}",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Top BPF Map Operation Failures",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"log": 10,
|
||||
"type": "log"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [ ],
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "ops"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 7,
|
||||
"x": 17,
|
||||
"y": 15
|
||||
},
|
||||
"id": 243,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.3-e1f2f3c",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(cilium_bpf_map_ops_total{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (operation, outcome)",
|
||||
"hide": false,
|
||||
"legendFormat": "{{operation}}: {{outcome}}",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Map Operation Outcomes",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 36,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"cilium-agent"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Data Source",
|
||||
"multi": false,
|
||||
"name": "prometheus_datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+",
|
||||
"skipUrlSync": false,
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": [
|
||||
"All"
|
||||
],
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"definition": "label_values(cilium_version, cluster)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": {
|
||||
"query": "label_values(cilium_version, cluster)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": [
|
||||
"All"
|
||||
],
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"definition": "label_values(cilium_version{cluster=~\"$cluster\"}, namespace)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "namespace",
|
||||
"options": [ ],
|
||||
"query": {
|
||||
"query": "label_values(cilium_version{cluster=~\"$cluster\"}, namespace)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": "cilium.*",
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"definition": "label_values(cilium_version{cluster=~\"$cluster\", namespace=~\"$namespace\"}, pod)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": false,
|
||||
"name": "pod",
|
||||
"options": [ ],
|
||||
"query": {
|
||||
"query": "label_values(cilium_version{cluster=~\"$cluster\", namespace=~\"$namespace\"}, pod)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "10",
|
||||
"value": "10"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "top k",
|
||||
"name": "k",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "10",
|
||||
"value": "10"
|
||||
}
|
||||
],
|
||||
"query": "10",
|
||||
"skipUrlSync": false,
|
||||
"type": "textbox"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-30m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "Cilium / Components / BPF",
|
||||
"uid": "integrations_cilium_ent_bpf",
|
||||
"version": 3,
|
||||
"weekStart": ""
|
||||
}
|
1229
assets/cilium-enterprise/dashboards/cilium-conntrack.json
Normal file
1229
assets/cilium-enterprise/dashboards/cilium-conntrack.json
Normal file
File diff suppressed because it is too large
Load diff
939
assets/cilium-enterprise/dashboards/cilium-datapath.json
Normal file
939
assets/cilium-enterprise/dashboards/cilium-datapath.json
Normal file
|
@ -0,0 +1,939 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "grafana"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Dashboard for Cilium v1.12 (https://cilium.io/) Agent metrics",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"gnetId": 16611,
|
||||
"graphTooltip": 1,
|
||||
"id": 3,
|
||||
"iteration": 1664184399070,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": true,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"cilium-overview"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Cilium Overviews",
|
||||
"tooltip": "",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
},
|
||||
{
|
||||
"asDropdown": true,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"cilium-agent"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Cilium Components",
|
||||
"tooltip": "",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [ ],
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "dump_interrupts conntrack ipv4"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#ea6460",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "dump_interrupts conntrack ipv6"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#58140c",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 79,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.3-e1f2f3c",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk($k, sum(rate(cilium_datapath_conntrack_dump_resets_total{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (area, family, name, pod))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{pod}} {{name}} {{area}} {{family}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Datapath Conntrack Dump Resets",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": -1,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 100,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 0,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"log": 10,
|
||||
"type": "log"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": true,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "normal"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [ ],
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "disconnecting"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#614d93",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "ready"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "rgba(81, 220, 95, 0.52)",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "waiting-to-regenerate"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#0a50a1",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 51,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.3-e1f2f3c",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(cilium_endpoint_state{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}) by (endpoint_state)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{endpoint_state}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Cilium Endpoint State",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [ ],
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "ops"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 106,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.3-e1f2f3c",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(cilium_services_events_total{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod, action)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{action}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Service Updates",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"links": [ ],
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "purple",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"id": 33,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"minVizHeight": 10,
|
||||
"minVizWidth": 0,
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"showUnfilled": true
|
||||
},
|
||||
"pluginVersion": "9.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(cilium_policy_endpoint_enforcement_status{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}) by (enforcement)",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"instant": true,
|
||||
"interval": "1s",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{enforcement}}",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Endpoints Policy Enforcement Status",
|
||||
"type": "bargauge"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 100,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [ ],
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "opm"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "fail"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#bf1b00",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "fail/min"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#890f02",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "success"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#447ebc",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "success/min"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#3f6833",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 17
|
||||
},
|
||||
"id": 49,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.3-e1f2f3c",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(cilium_endpoint_regenerations_total{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by(outcome)",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{outcome}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Endpoint Regenerations",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "bars",
|
||||
"fillOpacity": 100,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "normal"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [ ],
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 17
|
||||
},
|
||||
"id": 55,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean"
|
||||
],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.3-e1f2f3c",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "quantile(0.95, rate(cilium_endpoint_regeneration_time_stats_seconds_sum{pod=\"$pod\"}[$__rate_interval]) / rate(cilium_endpoint_regeneration_time_stats_seconds_count{pod=\"$pod\"}[$__rate_interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{scope}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "P95 Endpoint Regeneration Time",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 36,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"cilium-agent"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Data Source",
|
||||
"multi": false,
|
||||
"name": "prometheus_datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+",
|
||||
"skipUrlSync": false,
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": [
|
||||
"All"
|
||||
],
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"definition": "label_values(cilium_version, cluster)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": {
|
||||
"query": "label_values(cilium_version, cluster)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": [
|
||||
"All"
|
||||
],
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"definition": "label_values(cilium_version{cluster=~\"$cluster\"}, namespace)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "namespace",
|
||||
"options": [ ],
|
||||
"query": {
|
||||
"query": "label_values(cilium_version{cluster=~\"$cluster\"}, namespace)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": "cilium.*",
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"definition": "label_values(cilium_version{cluster=~\"$cluster\", namespace=~\"$namespace\"}, pod)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": false,
|
||||
"name": "pod",
|
||||
"options": [ ],
|
||||
"query": {
|
||||
"query": "label_values(cilium_version{cluster=~\"$cluster\", namespace=~\"$namespace\"}, pod)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "10",
|
||||
"value": "10"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "top k",
|
||||
"name": "k",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "10",
|
||||
"value": "10"
|
||||
}
|
||||
],
|
||||
"query": "10",
|
||||
"skipUrlSync": false,
|
||||
"type": "textbox"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-30m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "Cilium / Components / Datapath",
|
||||
"uid": "integrations_cilium_ent_datapath",
|
||||
"version": 3,
|
||||
"weekStart": ""
|
||||
}
|
1599
assets/cilium-enterprise/dashboards/cilium-external-fqdn-proxy.json
Normal file
1599
assets/cilium-enterprise/dashboards/cilium-external-fqdn-proxy.json
Normal file
File diff suppressed because it is too large
Load diff
1082
assets/cilium-enterprise/dashboards/cilium-fqdn-proxy.json
Normal file
1082
assets/cilium-enterprise/dashboards/cilium-fqdn-proxy.json
Normal file
File diff suppressed because it is too large
Load diff
417
assets/cilium-enterprise/dashboards/cilium-identities.json
Normal file
417
assets/cilium-enterprise/dashboards/cilium-identities.json
Normal file
|
@ -0,0 +1,417 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "grafana"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Dashboard for Cilium v1.12 (https://cilium.io/) Agent metrics",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"gnetId": 16611,
|
||||
"graphTooltip": 1,
|
||||
"id": 3,
|
||||
"iteration": 1664184399070,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": true,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"cilium-overview"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Cilium Overviews",
|
||||
"tooltip": "",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
},
|
||||
{
|
||||
"asDropdown": true,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"cilium-agent"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Cilium Components",
|
||||
"tooltip": "",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "bars",
|
||||
"fillOpacity": 53,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [ ],
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "percentage",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "Identities"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 11,
|
||||
"w": 11,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 259,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.5-0100a6a",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(cilium_identity{pod=~\"$pod\"}) by (type)",
|
||||
"hide": false,
|
||||
"legendFormat": "{{type}}",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Allocated Identities",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Number identities allocated by type.\n\nLimits for identity allocations are:\n\nMax(cluster_local)=65280\nMax(node_local)=65535.\n\nRunning out of identities is a potentially critical issue.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"links": [ ],
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "#EAB839",
|
||||
"value": 60000
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 65280
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "Identities"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 11,
|
||||
"w": 13,
|
||||
"x": 11,
|
||||
"y": 0
|
||||
},
|
||||
"id": 304,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "9.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(cilium_identity{pod=~\"$pod\"}) by (type)",
|
||||
"hide": false,
|
||||
"legendFormat": "{{type}}",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(cilium_identity{pod=~\"$pod\"})",
|
||||
"hide": false,
|
||||
"legendFormat": "total",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Allocated Identities",
|
||||
"type": "stat"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 36,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"cilium-agent"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Data Source",
|
||||
"multi": false,
|
||||
"name": "prometheus_datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+",
|
||||
"skipUrlSync": false,
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": [
|
||||
"All"
|
||||
],
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"definition": "label_values(cilium_version, cluster)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": {
|
||||
"query": "label_values(cilium_version, cluster)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": [
|
||||
"All"
|
||||
],
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"definition": "label_values(cilium_version{cluster=~\"$cluster\"}, namespace)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "namespace",
|
||||
"options": [ ],
|
||||
"query": {
|
||||
"query": "label_values(cilium_version{cluster=~\"$cluster\"}, namespace)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": "cilium.*",
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"definition": "label_values(cilium_version{cluster=~\"$cluster\", namespace=~\"$namespace\"}, pod)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": false,
|
||||
"name": "pod",
|
||||
"options": [ ],
|
||||
"query": {
|
||||
"query": "label_values(cilium_version{cluster=~\"$cluster\", namespace=~\"$namespace\"}, pod)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "10",
|
||||
"value": "10"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "top k",
|
||||
"name": "k",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "10",
|
||||
"value": "10"
|
||||
}
|
||||
],
|
||||
"query": "10",
|
||||
"skipUrlSync": false,
|
||||
"type": "textbox"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-30m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "Cilium / Components / Identities",
|
||||
"uid": "integrations_cilium_ent_identities",
|
||||
"version": 3,
|
||||
"weekStart": ""
|
||||
}
|
1582
assets/cilium-enterprise/dashboards/cilium-kubernetes.json
Normal file
1582
assets/cilium-enterprise/dashboards/cilium-kubernetes.json
Normal file
File diff suppressed because it is too large
Load diff
433
assets/cilium-enterprise/dashboards/cilium-network.json
Normal file
433
assets/cilium-enterprise/dashboards/cilium-network.json
Normal file
|
@ -0,0 +1,433 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "grafana"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Dashboard for Cilium v1.12 (https://cilium.io/) Agent metrics",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"gnetId": 16611,
|
||||
"graphTooltip": 1,
|
||||
"id": 3,
|
||||
"iteration": 1664184399070,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": true,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"cilium-overview"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Cilium Overviews",
|
||||
"tooltip": "",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
},
|
||||
{
|
||||
"asDropdown": true,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"cilium-agent"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Cilium Components",
|
||||
"tooltip": "",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"links": [ ],
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "ipv4"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#5195ce",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "ipv6"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#6d1f62",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 87,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "9.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(cilium_ip_addresses{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}) by (family)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{family}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Allocated Addresses",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [ ],
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 89,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.3-e1f2f3c",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "topk($k, sum(cilium_unreachable_health_endpoints{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}) by (pod))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{pod}}",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Reported Unreachable Health Endpoints",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 36,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"cilium-agent"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Data Source",
|
||||
"multi": false,
|
||||
"name": "prometheus_datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+",
|
||||
"skipUrlSync": false,
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": [
|
||||
"All"
|
||||
],
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"definition": "label_values(cilium_version, cluster)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": {
|
||||
"query": "label_values(cilium_version, cluster)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": [
|
||||
"All"
|
||||
],
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"definition": "label_values(cilium_version{cluster=~\"$cluster\"}, namespace)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "namespace",
|
||||
"options": [ ],
|
||||
"query": {
|
||||
"query": "label_values(cilium_version{cluster=~\"$cluster\"}, namespace)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": "cilium.*",
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"definition": "label_values(cilium_version{cluster=~\"$cluster\", namespace=~\"$namespace\"}, pod)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": false,
|
||||
"name": "pod",
|
||||
"options": [ ],
|
||||
"query": {
|
||||
"query": "label_values(cilium_version{cluster=~\"$cluster\", namespace=~\"$namespace\"}, pod)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "10",
|
||||
"value": "10"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "top k",
|
||||
"name": "k",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "10",
|
||||
"value": "10"
|
||||
}
|
||||
],
|
||||
"query": "10",
|
||||
"skipUrlSync": false,
|
||||
"type": "textbox"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-30m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "Cilium / Components / Network",
|
||||
"uid": "integrations_cilium_ent_network",
|
||||
"version": 3,
|
||||
"weekStart": ""
|
||||
}
|
586
assets/cilium-enterprise/dashboards/cilium-nodes.json
Normal file
586
assets/cilium-enterprise/dashboards/cilium-nodes.json
Normal file
|
@ -0,0 +1,586 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "grafana"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Dashboard for Cilium v1.12 (https://cilium.io/) Agent metrics",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"gnetId": 16611,
|
||||
"graphTooltip": 1,
|
||||
"id": 3,
|
||||
"iteration": 1664184399070,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": true,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"cilium-overview"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Cilium Overviews",
|
||||
"tooltip": "",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
},
|
||||
{
|
||||
"asDropdown": true,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"cilium-agent"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Cilium Components",
|
||||
"tooltip": "",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 226,
|
||||
"panels": [ ],
|
||||
"title": "Cilium Nodes",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 35,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [ ],
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "ops"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Avg"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "#cca300",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Max"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "rgb(167, 150, 111)",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Max"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.fillBelowTo",
|
||||
"value": "Min"
|
||||
},
|
||||
{
|
||||
"id": "custom.lineWidth",
|
||||
"value": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Min"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.lineWidth",
|
||||
"value": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "add k8s"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "short"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "delete k8s"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "short"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "update k8s"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "short"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "add local-node"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "short"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 12,
|
||||
"w": 18,
|
||||
"x": 0,
|
||||
"y": 1
|
||||
},
|
||||
"id": 93,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.3-e1f2f3c",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "avg(rate(cilium_nodes_all_events_received_total{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (event_type, source)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{eventType}} {{source}}",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Node Events",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"links": [ ],
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "blue",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 1
|
||||
},
|
||||
"id": 91,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "9.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(cilium_nodes_all_num{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Nodes",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Cilium Nodes",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"links": [ ],
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 7
|
||||
},
|
||||
"id": 218,
|
||||
"links": [ ],
|
||||
"options": {
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"showThresholdLabels": false,
|
||||
"showThresholdMarkers": true
|
||||
},
|
||||
"pluginVersion": "9.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(cilium_unreachable_nodes{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{pod}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Unreachable Cilium Nodes",
|
||||
"type": "gauge"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 36,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"cilium-agent"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Data Source",
|
||||
"multi": false,
|
||||
"name": "prometheus_datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+",
|
||||
"skipUrlSync": false,
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": [
|
||||
"All"
|
||||
],
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"definition": "label_values(cilium_version, cluster)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": {
|
||||
"query": "label_values(cilium_version, cluster)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": [
|
||||
"All"
|
||||
],
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"definition": "label_values(cilium_version{cluster=~\"$cluster\"}, namespace)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "namespace",
|
||||
"options": [ ],
|
||||
"query": {
|
||||
"query": "label_values(cilium_version{cluster=~\"$cluster\"}, namespace)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": "cilium.*",
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"definition": "label_values(cilium_version{cluster=~\"$cluster\", namespace=~\"$namespace\"}, pod)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": false,
|
||||
"name": "pod",
|
||||
"options": [ ],
|
||||
"query": {
|
||||
"query": "label_values(cilium_version{cluster=~\"$cluster\", namespace=~\"$namespace\"}, pod)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "10",
|
||||
"value": "10"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "top k",
|
||||
"name": "k",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "10",
|
||||
"value": "10"
|
||||
}
|
||||
],
|
||||
"query": "10",
|
||||
"skipUrlSync": false,
|
||||
"type": "textbox"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-30m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "Cilium / Components / Nodes",
|
||||
"uid": "integrations_cilium_ent_nodes",
|
||||
"version": 3,
|
||||
"weekStart": ""
|
||||
}
|
4420
assets/cilium-enterprise/dashboards/cilium-operator.json
Normal file
4420
assets/cilium-enterprise/dashboards/cilium-operator.json
Normal file
File diff suppressed because it is too large
Load diff
1751
assets/cilium-enterprise/dashboards/cilium-overview.json
Normal file
1751
assets/cilium-enterprise/dashboards/cilium-overview.json
Normal file
File diff suppressed because it is too large
Load diff
1751
assets/cilium-enterprise/dashboards/cilium-policy.json
Normal file
1751
assets/cilium-enterprise/dashboards/cilium-policy.json
Normal file
File diff suppressed because it is too large
Load diff
1152
assets/cilium-enterprise/dashboards/cilium-resource-utilization.json
Normal file
1152
assets/cilium-enterprise/dashboards/cilium-resource-utilization.json
Normal file
File diff suppressed because it is too large
Load diff
3349
assets/cilium-enterprise/dashboards/hubble-overview.json
Normal file
3349
assets/cilium-enterprise/dashboards/hubble-overview.json
Normal file
File diff suppressed because it is too large
Load diff
1162
assets/cilium-enterprise/dashboards/hubble-timescape.json
Normal file
1162
assets/cilium-enterprise/dashboards/hubble-timescape.json
Normal file
File diff suppressed because it is too large
Load diff
1
assets/cilium-enterprise/rules.yaml
Normal file
1
assets/cilium-enterprise/rules.yaml
Normal file
|
@ -0,0 +1 @@
|
|||
null
|
44
assets/clickhouse/alerts.yaml
Normal file
44
assets/clickhouse/alerts.yaml
Normal file
|
@ -0,0 +1,44 @@
|
|||
groups:
|
||||
- name: ClickHouseAlerts
|
||||
rules:
|
||||
- alert: ClickHouseReplicationQueueBackingUp
|
||||
annotations:
|
||||
description: |
|
||||
ClickHouse replication tasks are processing slower than expected on {{ $labels.instance }} causing replication queue size to back up at {{ $value }} exceeding the threshold value of 99.
|
||||
summary: ClickHouse replica max queue size backing up.
|
||||
expr: |
|
||||
ClickHouseAsyncMetrics_ReplicasMaxQueueSize > 99
|
||||
for: 5m
|
||||
keep_firing_for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: ClickHouseRejectedInserts
|
||||
annotations:
|
||||
description: ClickHouse inserts are being rejected on {{ $labels.instance }}
|
||||
as items are being inserted faster than ClickHouse is able to merge them.
|
||||
summary: ClickHouse has too many rejected inserts.
|
||||
expr: ClickHouseProfileEvents_RejectedInserts > 1
|
||||
for: 5m
|
||||
keep_firing_for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: ClickHouseZookeeperSessions
|
||||
annotations:
|
||||
description: |
|
||||
ClickHouse has more than one connection to a Zookeeper on {{ $labels.instance }} which can lead to bugs due to stale reads in Zookeepers consistency model.
|
||||
summary: ClickHouse has too many Zookeeper sessions.
|
||||
expr: ClickHouseMetrics_ZooKeeperSession > 1
|
||||
for: 5m
|
||||
keep_firing_for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: ClickHouseReplicasInReadOnly
|
||||
annotations:
|
||||
description: |
|
||||
ClickHouse has replicas in a read only state on {{ $labels.instance }} after losing connection to Zookeeper or at startup.
|
||||
summary: ClickHouse has too many replicas in read only state.
|
||||
expr: ClickHouseMetrics_ReadonlyReplica > 0
|
||||
for: 5m
|
||||
keep_firing_for: 5m
|
||||
labels:
|
||||
severity: critical
|
616
assets/clickhouse/dashboards/clickhouse-latency.json
Normal file
616
assets/clickhouse/dashboards/clickhouse-latency.json
Normal file
|
@ -0,0 +1,616 @@
|
|||
{
|
||||
"__inputs": [ ],
|
||||
"__requires": [ ],
|
||||
"annotations": {
|
||||
"list": [ ]
|
||||
},
|
||||
"editable": false,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 1,
|
||||
"hideControls": false,
|
||||
"id": null,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": false,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"clickhouse-mixin"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Other ClickHouse dashboards",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Time spent waiting for read syscall",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "µs"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"expr": "increase(ClickHouseProfileEvents_DiskReadElapsedMicroseconds{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
|
||||
"legendFormat": "{{ instance }} - disk read elapsed",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Disk read latency",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Time spent waiting for write syscall",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "µs"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"expr": "increase(ClickHouseProfileEvents_DiskWriteElapsedMicroseconds{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
|
||||
"legendFormat": "{{ instance }} - disk write elapsed",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Disk write latency",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Latency of inbound network traffic",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "µs"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"expr": "increase(ClickHouseProfileEvents_NetworkReceiveElapsedMicroseconds{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
|
||||
"legendFormat": "{{ instance }} - network receive elapsed",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Network receive latency",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Latency of outbound network traffic",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "µs"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"expr": "increase(ClickHouseProfileEvents_NetworkSendElapsedMicroseconds{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
|
||||
"legendFormat": "{{ instance }} - network send elapsed",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Network transmit latency",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"description": "Time spent waiting for ZooKeeper request to process",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [ ],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "µs"
|
||||
},
|
||||
"overrides": [ ]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 16
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [ ],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"editorMode": "builder",
|
||||
"expr": "increase(ClickHouseProfileEvents_ZooKeeperWaitMicroseconds{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
|
||||
"legendFormat": "{{ instance }} - ZooKeeper wait",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "ZooKeeper wait time",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "1m",
|
||||
"rows": [ ],
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"clickhouse-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": { },
|
||||
"hide": 0,
|
||||
"label": "Data source",
|
||||
"name": "prometheus_datasource",
|
||||
"options": [ ],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": {
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(ClickHouseProfileEvents_DiskReadElapsedMicroseconds,job)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "instance",
|
||||
"multi": false,
|
||||
"name": "instance",
|
||||
"options": [ ],
|
||||
"query": "label_values(ClickHouseProfileEvents_DiskReadElapsedMicroseconds{job=~\"$job\"}, instance)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": "",
|
||||
"current": { },
|
||||
"datasource": {
|
||||
"uid": "${prometheus_datasource}"
|
||||
},
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "Cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ClickHouseProfileEvents_DiskReadElapsedMicroseconds{job=~\"$job\"}, cluster)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-30m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "default",
|
||||
"title": "ClickHouse latency",
|
||||
"uid": "clickhouse-latency",
|
||||
"version": 0
|
||||
}
|
295
assets/clickhouse/dashboards/clickhouse-logs.json
Normal file
295
assets/clickhouse/dashboards/clickhouse-logs.json
Normal file
|
@ -0,0 +1,295 @@
|
|||
{
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": false,
|
||||
"icon": "external link",
|
||||
"includeVars": true,
|
||||
"keepTime": true,
|
||||
"tags": [
|
||||
"clickhouse-mixin"
|
||||
],
|
||||
"targetBlank": false,
|
||||
"title": "Other ClickHouse dashboards",
|
||||
"type": "dashboards",
|
||||
"url": ""
|
||||
}
|
||||
],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"description": "Logs volume grouped by \"level\" label.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"drawStyle": "bars",
|
||||
"fillOpacity": 50,
|
||||
"stacking": {
|
||||
"mode": "normal"
|
||||
}
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(E|e)merg|(F|f)atal|(A|a)lert|(C|c)rit.*"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "purple",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(E|e)(rr.*|RR.*)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "red",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(W|w)(arn.*|ARN.*|rn|RN)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "orange",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(N|n)(otice|ote)|(I|i)(nf.*|NF.*)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "green",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "dbg.*|DBG.*|(D|d)(EBUG|ebug)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "blue",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "(T|t)(race|RACE)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "light-blue",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "logs"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"fixedColor": "text",
|
||||
"mode": "fixed"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 24
|
||||
},
|
||||
"id": 1,
|
||||
"interval": "30s",
|
||||
"options": {
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "v10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"expr": "sum by (level) (count_over_time({job=~\".*/clickhouse.*\",job=~\"$job\",instance=~\"$instance\",level=~\"$level\"}\n|~ \"$regex_search\"\n\n[$__interval]))\n",
|
||||
"legendFormat": "{{ level }}"
|
||||
}
|
||||
],
|
||||
"title": "Logs volume",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "renameByRegex",
|
||||
"options": {
|
||||
"regex": "Value",
|
||||
"renamePattern": "logs"
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "-- Mixed --"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 18,
|
||||
"w": 24
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"dedupStrategy": "exact",
|
||||
"enableLogDetails": true,
|
||||
"prettifyLogMessage": true,
|
||||
"showTime": false,
|
||||
"wrapLogMessage": true
|
||||
},
|
||||
"pluginVersion": "v10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"expr": "{job=~\".*/clickhouse.*\",job=~\"$job\",instance=~\"$instance\",level=~\"$level\"} \n|~ \"$regex_search\"\n\n\n"
|
||||
}
|
||||
],
|
||||
"title": "Logs",
|
||||
"type": "logs"
|
||||
}
|
||||
],
|
||||
"refresh": "1m",
|
||||
"schemaVersion": 36,
|
||||
"tags": [
|
||||
"clickhouse-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"label": "Loki data source",
|
||||
"name": "loki_datasource",
|
||||
"query": "loki",
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "Job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"query": "label_values({job=~\".*/clickhouse.*\"}, job)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "Instance",
|
||||
"multi": true,
|
||||
"name": "instance",
|
||||
"query": "label_values({job=~\".*/clickhouse.*\",job=~\"$job\"}, instance)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "${loki_datasource}"
|
||||
},
|
||||
"includeAll": true,
|
||||
"label": "Level",
|
||||
"multi": true,
|
||||
"name": "level",
|
||||
"query": "label_values({job=~\".*/clickhouse.*\",job=~\"$job\",instance=~\"$instance\"}, level)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"label": "Regex search",
|
||||
"name": "regex_search",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "",
|
||||
"value": ""
|
||||
}
|
||||
],
|
||||
"query": "",
|
||||
"type": "textbox"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "ClickHouse logs",
|
||||
"uid": "clickhouse-logs-overview"
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue