1
0
Fork 0
mirror of https://github.com/monitoring-mixins/website.git synced 2024-12-14 11:37:31 +00:00

Merge pull request #41 from v-zhuravlev/jsonnet-ilbs

Add jsonnet-libs mixins
This commit is contained in:
Ryan Geyer 2024-10-23 23:43:25 -07:00 committed by GitHub
commit 25d0e39bb9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
444 changed files with 308117 additions and 23994 deletions

53
assets/MSSQL/alerts.yaml Normal file
View file

@ -0,0 +1,53 @@
groups:
- name: MSSQLAlerts
rules:
- alert: MSSQLHighNumberOfDeadlocks
annotations:
description: '{{ printf "%.2f" $value }} deadlocks have occurred over the last
5 minutes on {{$labels.instance}}, which is above threshold of 10 deadlocks.'
summary: There are deadlocks ocurring in the database.
expr: |
increase(mssql_deadlocks_total{}[5m]) > 10
for: 5m
labels:
severity: warning
- alert: MSSQLModerateReadStallTime
annotations:
description: '{{ printf "%.2f" $value }}ms of IO read stall has occurred on
{{$labels.instance}}, which is above threshold of 200ms.'
summary: There is a moderate amount of IO stall for database reads.
expr: |
1000 * increase(mssql_io_stall_seconds_total{operation="read"}[5m]) > 200
for: 5m
labels:
severity: warning
- alert: MSSQLHighReadStallTime
annotations:
description: '{{ printf "%.2f" $value }}ms of IO read stall has occurred on
{{$labels.instance}}, which is above threshold of 400ms.'
summary: There is a high amount of IO stall for database reads.
expr: |
1000 * increase(mssql_io_stall_seconds_total{operation="read"}[5m]) > 400
for: 5m
labels:
severity: critical
- alert: MSSQLModerateWriteStallTime
annotations:
description: '{{ printf "%.2f" $value }}ms of IO write stall has occurred on
{{$labels.instance}}, which is above threshold of 200ms.'
summary: There is a moderate amount of IO stall for database writes.
expr: |
1000 * increase(mssql_io_stall_seconds_total{operation="write"}[5m]) > 200
for: 5m
labels:
severity: warning
- alert: MSSQLHighWriteStallTime
annotations:
description: '{{ printf "%.2f" $value }}ms of IO write stall has occurred on
{{$labels.instance}}, which is above threshold of 400ms.'
summary: There is a high amount of IO stall for database writes.
expr: |
1000 * increase(mssql_io_stall_seconds_total{operation="write"}[5m]) > 400
for: 5m
labels:
severity: critical

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,515 @@
{
"__inputs": [ ],
"__requires": [ ],
"annotations": {
"list": [ ]
},
"description": "An overview of MSSQL paging metrics.",
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"mssql-mixin"
],
"targetBlank": false,
"title": "Other MSSQL dashboards",
"type": "dashboards",
"url": ""
}
],
"panels": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Memory used for the OS page file.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 50,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "bytes"
},
"overrides": [ ]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "mssql_os_page_file{job=~\"$job\", instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} - {{state}}"
}
],
"title": "Page file memory",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Percentage of page found and read from the SQL Server buffer cache.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percent"
},
"overrides": [ ]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 3,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "mssql_buffer_cache_hit_ratio{job=~\"$job\", instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}"
}
],
"title": "Buffer cache hit percentage",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Rate of page checkpoints per second.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "checkpoints/s"
},
"overrides": [ ]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 4,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "mssql_checkpoint_pages_sec{job=~\"$job\", instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}"
}
],
"title": "Page checkpoints",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "The number of page faults that were incurred by the SQL Server process.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "faults"
},
"overrides": [ ]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 8
},
"id": 5,
"interval": "1m",
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "increase(mssql_page_fault_count_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}"
}
],
"title": "Page faults",
"type": "timeseries"
}
],
"refresh": "1m",
"rows": [ ],
"schemaVersion": 14,
"style": "dark",
"tags": [
"mssql-mixin"
],
"templating": {
"list": [
{
"current": { },
"hide": 0,
"label": "Data Source",
"name": "prometheus_datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": ".+",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 0,
"includeAll": true,
"label": "Job",
"multi": true,
"name": "job",
"options": [ ],
"query": "label_values(mssql_build_info{}, job)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".*",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 2,
"includeAll": true,
"label": "Cluster",
"multi": true,
"name": "cluster",
"options": [ ],
"query": "label_values(mssql_build_info{job=~\"$job\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".+",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 0,
"includeAll": true,
"label": "Instance",
"multi": true,
"name": "instance",
"options": [ ],
"query": "label_values(mssql_build_info{job=~\"$job\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "default",
"title": "MSSQL pages",
"uid": "mssql-pages",
"version": 0
}

1
assets/MSSQL/rules.yaml Normal file
View file

@ -0,0 +1 @@
null

View file

@ -0,0 +1,95 @@
groups:
- name: aerospike
rules:
- alert: AerospikeNodeHighMemoryUsage
annotations:
description: '{{ printf "%.0f" $value }} percent of system memory used on node
{{$labels.instance}} on cluster {{$labels.aerospike_cluster}}, which is above
the threshold of 80.'
summary: There is a limited amount of memory available for a node.
expr: |
100 - sum without (service) (aerospike_node_stats_system_free_mem_pct) >= 80
for: 5m
labels:
severity: critical
- alert: AerospikeNamespaceHighDiskUsage
annotations:
description: '{{ printf "%.0f" $value }} percent of disk space available for
namespace {{$labels.ns}} on node {{$labels.instance}}, on cluster {{$labels.aerospike_cluster}},
which is above the threshold of 80.'
summary: There is a limited amount of disk space available for a node.
expr: |
100 - sum without (service) (aerospike_namespace_device_free_pct) >= 80
for: 5m
labels:
severity: critical
- alert: AerospikeUnavailablePartitions
annotations:
description: '{{ printf "%.0f" $value }} unavailable partition(s) in namespace
{{$labels.ns}}, on node {{$labels.instance}}, on cluster {{$labels.aerospike_cluster}},
which is above the threshold of 0.'
summary: There are unavailable partitions in the Aerospike cluster.
expr: |
sum without(service) (aerospike_namespace_unavailable_partitions) > 0
for: 5m
labels:
severity: critical
- alert: AerospikeDeadPartitions
annotations:
description: '{{ printf "%.0f" $value }} dead partition(s) in namespace {{$labels.ns}},
on node {{$labels.instance}}, on cluster {{$labels.aerospike_cluster}}, which
is above the threshold of 0.'
summary: There are dead partitions in the Aerospike cluster.
expr: |
sum without(service) (aerospike_namespace_dead_partitions) > 0
for: 5m
labels:
severity: critical
- alert: AerospikeNamespaceRejectingWrites
annotations:
description: Namespace {{$labels.ns}} on node {{$labels.instance}} on cluster
{{$labels.aerospike_cluster}} is currently rejecting all client-originated
writes.
summary: A namespace is currently rejecting all writes. Check for unavailable/dead
partitions, clock skew, or nodes running out of memory/disk.
expr: |
sum without(service) (aerospike_namespace_stop_writes + aerospike_namespace_clock_skew_stop_writes) > 0
for: 5m
labels:
severity: critical
- alert: AerospikeHighClientReadErrorRate
annotations:
description: '{{ printf "%.0f" $value }} percent of client read transactions
are resulting in errors for namespace {{$labels.ns}}, on node {{$labels.instance}},
on cluster {{$labels.aerospike_cluster}}, which is above the threshold of
25.'
summary: There is a high rate of errors for client read transactions.
expr: |
sum without(service) (rate(aerospike_namespace_client_read_error[5m])) / (clamp_min(sum without(service) (rate(aerospike_namespace_client_read_error[5m])) + sum without(service) (rate(aerospike_namespace_client_read_success[5m])), 1)) > 25
for: 5m
labels:
severity: warning
- alert: AerospikeHighClientWriteErrorRate
annotations:
description: '{{ printf "%.0f" $value }} percent of client write transactions
are resulting in errors for namespace {{$labels.ns}}, on node {{$labels.instance}},
on cluster {{$labels.aerospike_cluster}}, which is above the threshold of
25.'
summary: There is a high rate of errors for client write transactions.
expr: |
sum without(service) (rate(aerospike_namespace_client_write_error[5m])) / (clamp_min(sum without(service) (rate(aerospike_namespace_client_write_error[5m])) + sum without(service) (rate(aerospike_namespace_client_write_success[5m])), 1)) > 25
for: 5m
labels:
severity: warning
- alert: AerospikeHighClientUDFErrorRate
annotations:
description: '{{ printf "%.0f" $value }} percent of client UDF transactions
are resulting in errors for namespace {{$labels.ns}}, on node {{$labels.instance}},
on cluster {{$labels.aerospike_cluster}}, which is above the threshold of
25.'
summary: There is a high rate of errors for client UDF transactions.
expr: |
sum without(service) (rate(aerospike_namespace_client_udf_error[5m])) / (clamp_min(sum without(service) (rate(aerospike_namespace_client_udf_error[5m])) + sum without(service) (rate(aerospike_namespace_client_udf_complete[5m])), 1)) > 25
for: 5m
labels:
severity: warning

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,325 @@
{
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": false,
"keepTime": true,
"tags": [
"aerospike-mixin"
],
"targetBlank": false,
"title": "Other Aerospike Dashboards",
"type": "dashboards",
"url": ""
}
],
"panels": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"description": "Logs volume grouped by \"level\" label.",
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "bars",
"fillOpacity": 50,
"stacking": {
"mode": "normal"
}
},
"unit": "none"
},
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": "(E|e)merg|(F|f)atal|(A|a)lert|(C|c)rit.*"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "purple",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "(E|e)(rr.*|RR.*)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "red",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "(W|w)(arn.*|ARN.*|rn|RN)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "orange",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "(N|n)(otice|ote)|(I|i)(nf.*|NF.*)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "green",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "dbg.*|DBG.*|(D|d)(EBUG|ebug)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "blue",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "(T|t)(race|RACE)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "light-blue",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "logs"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "text",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 6,
"w": 24
},
"id": 1,
"interval": "30s",
"options": {
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "v10.0.0",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"expr": "sum by (level) (count_over_time({job=~\"integrations/aerospike\",job=~\"$job\",aerospike_cluster=~\"$aerospike_cluster\",instance=~\"$instance\",context=~\"$context\",level=~\"$level\"}\n|~ \"$regex_search\"\n\n[$__interval]))\n",
"legendFormat": "{{ level }}"
}
],
"title": "Logs volume",
"transformations": [
{
"id": "renameByRegex",
"options": {
"regex": "Value",
"renamePattern": "logs"
}
}
],
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"gridPos": {
"h": 18,
"w": 24
},
"id": 2,
"options": {
"dedupStrategy": "exact",
"enableLogDetails": true,
"prettifyLogMessage": true,
"showTime": false,
"wrapLogMessage": true
},
"pluginVersion": "v10.0.0",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"expr": "{job=~\"integrations/aerospike\",job=~\"$job\",aerospike_cluster=~\"$aerospike_cluster\",instance=~\"$instance\",context=~\"$context\",level=~\"$level\"} \n|~ \"$regex_search\"\n\n\n"
}
],
"title": "Logs",
"type": "logs"
}
],
"refresh": "1m",
"schemaVersion": 36,
"tags": [
"aerospike-mixin"
],
"templating": {
"list": [
{
"label": "Loki data source",
"name": "loki_datasource",
"query": "loki",
"regex": "",
"type": "datasource"
},
{
"allValue": ".*",
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"includeAll": true,
"label": "Job",
"multi": true,
"name": "job",
"query": "label_values({job=~\"integrations/aerospike\"}, job)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"allValue": ".*",
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"includeAll": true,
"label": "Aerospike_cluster",
"multi": true,
"name": "aerospike_cluster",
"query": "label_values({job=~\"integrations/aerospike\",job=~\"$job\"}, aerospike_cluster)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"allValue": ".*",
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"includeAll": true,
"label": "Instance",
"multi": true,
"name": "instance",
"query": "label_values({job=~\"integrations/aerospike\",job=~\"$job\",aerospike_cluster=~\"$aerospike_cluster\"}, instance)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"allValue": ".*",
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"includeAll": true,
"label": "Context",
"multi": true,
"name": "context",
"query": "label_values({job=~\"integrations/aerospike\",job=~\"$job\",aerospike_cluster=~\"$aerospike_cluster\",instance=~\"$instance\"}, context)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"allValue": ".*",
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"includeAll": true,
"label": "Level",
"multi": true,
"name": "level",
"query": "label_values({job=~\"integrations/aerospike\",job=~\"$job\",aerospike_cluster=~\"$aerospike_cluster\",instance=~\"$instance\",context=~\"$context\"}, level)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"current": {
"selected": false,
"text": "",
"value": ""
},
"label": "Regex search",
"name": "regex_search",
"options": [
{
"selected": true,
"text": "",
"value": ""
}
],
"query": "",
"type": "textbox"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timezone": "utc",
"title": "Aerospike logs",
"uid": "aerospike-logs"
}

View file

@ -0,0 +1,967 @@
{
"__inputs": [ ],
"__requires": [ ],
"annotations": {
"list": [ ]
},
"description": "",
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"aerospike-mixin"
],
"targetBlank": false,
"title": "Other Aerospike Dashboards",
"type": "dashboards",
"url": ""
}
],
"panels": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Number of unavailable data partitions in an Aerospike namespace.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 1
}
]
},
"unit": "none"
},
"overrides": [ ]
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"displayMode": "gradient",
"minVizHeight": 10,
"minVizWidth": 0,
"orientation": "vertical",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showUnfilled": true,
"text": {
"valueSize": 100
},
"valueMode": "color"
},
"pluginVersion": "10.2.0-59542pre",
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "aerospike_namespace_unavailable_partitions{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{aerospike_cluster}} - {{ns}}"
}
],
"title": "Unavailable partitions",
"type": "bargauge"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Disk utilization in an Aerospike namespace.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "continuous-BlYlRd"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "scheme",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percent"
},
"overrides": [ ]
},
"gridPos": {
"h": 8,
"w": 16,
"x": 8,
"y": 0
},
"id": 3,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "100 - aerospike_namespace_device_free_pct{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{aerospike_cluster}} - {{ns}}"
}
],
"title": "Disk usage",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Number of dead data partitions in an Aerospike namespace.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 1
}
]
},
"unit": "none"
},
"overrides": [ ]
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 8
},
"id": 4,
"options": {
"displayMode": "gradient",
"minVizHeight": 10,
"minVizWidth": 0,
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showUnfilled": true,
"text": {
"valueSize": 100
},
"valueMode": "color"
},
"pluginVersion": "10.2.0-59542pre",
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "aerospike_namespace_dead_partitions{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{aerospike_cluster}} - {{ns}}"
}
],
"title": "Dead partitions",
"type": "bargauge"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Memory utilization in an Aerospike namespace.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "continuous-BlYlRd"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "scheme",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percent"
},
"overrides": [ ]
},
"gridPos": {
"h": 8,
"w": 16,
"x": 8,
"y": 8
},
"id": 5,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "100 - aerospike_namespace_memory_free_pct{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{aerospike_cluster}} - {{ns}}"
}
],
"title": "Memory usage",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Rate of client read transactions in an Aerospike namespace organized by result.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "rps"
},
"overrides": [ ]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 16
},
"id": 6,
"options": {
"legend": {
"calcs": [
"min",
"mean",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_read_success{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{aerospike_cluster}} - {{ns}} - success"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_read_error{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{aerospike_cluster}} - {{ns}} - error"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_read_filtered_out{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{aerospike_cluster}} - {{ns}} - filtered"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_read_timeout{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{aerospike_cluster}} - {{ns}} - timeout"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_read_not_found{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{aerospike_cluster}} - {{ns}} - not found"
}
],
"title": "Client reads",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Rate of client write transactions in an Aerospike namespace organized by result.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "wps"
},
"overrides": [ ]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 16
},
"id": 7,
"options": {
"legend": {
"calcs": [
"min",
"mean",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_write_success{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{aerospike_cluster}} - {{ns}} - success"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_write_error{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{aerospike_cluster}} - {{ns}} - error"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_write_filtered_out{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{aerospike_cluster}} - {{ns}} - filtered"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_write_timeout{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{aerospike_cluster}} - {{ns}} - timeout"
}
],
"title": "Client writes",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Rate of client UDF transactions in an Aerospike namespace organized by result.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "ops"
},
"overrides": [ ]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 24
},
"id": 8,
"options": {
"legend": {
"calcs": [
"min",
"mean",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_udf_complete{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{aerospike_cluster}} - {{ns}} - complete"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_udf_error{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{aerospike_cluster}} - {{ns}} - error"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_udf_filtered_out{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{aerospike_cluster}} - {{ns}} - filtered"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum by(aerospike_cluster, job, ns) (rate(aerospike_namespace_client_udf_timeout{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{aerospike_cluster}} - {{ns}} - timeout"
}
],
"title": "Client UDF transactions",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Percentage of read transactions that are resolved by a cache hit in an Aerospike namespace.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percent"
},
"overrides": [ ]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 24
},
"id": 9,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "aerospike_namespace_cache_read_pct{job=~\"$job\", aerospike_cluster=~\"$aerospike_cluster\", ns=~\"$ns\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{aerospike_cluster}} - {{ns}}"
}
],
"title": "Cache read utilization",
"type": "timeseries"
}
],
"refresh": "1m",
"rows": [ ],
"schemaVersion": 14,
"style": "dark",
"tags": [
"aerospike-mixin"
],
"templating": {
"list": [
{
"current": { },
"hide": 0,
"label": "Data source",
"name": "prometheus_datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": ".+",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 0,
"includeAll": true,
"label": "Job",
"multi": true,
"name": "job",
"options": [ ],
"query": "label_values(aerospike_namespace_ns_cluster_size,job)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".*",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 2,
"includeAll": true,
"label": "Cluster",
"multi": true,
"name": "cluster",
"options": [ ],
"query": "label_values(aerospike_namespace_dead_partitions{job=~\"$job\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": "",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 0,
"includeAll": true,
"label": "Aerospike cluster",
"multi": true,
"name": "aerospike_cluster",
"options": [ ],
"query": "label_values(aerospike_namespace_ns_cluster_size{job=~\"$job\"}, aerospike_cluster)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": "",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 0,
"includeAll": true,
"label": "Namespace",
"multi": true,
"name": "ns",
"options": [ ],
"query": "label_values(aerospike_namespace_xmem_id{job=~\"$job\"}, ns)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-30m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "default",
"title": "Aerospike namespace overview",
"uid": "aerospike-namespace-overview",
"version": 0
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1 @@
null

View file

@ -0,0 +1,51 @@
groups:
- name: apache-activemq-alerts
rules:
- alert: ApacheActiveMQHighTopicMemoryUsage
annotations:
description: '{{ printf "%.0f" $value }} percent of memory used by topics on
{{$labels.instance}} in cluster {{$labels.activemq_cluster}}, which is above
the threshold of 70 percent.'
summary: Topic destination memory usage is high, which may result in a reduction
of the rate at which producers send messages.
expr: |
sum without (destination) (activemq_topic_memory_percent_usage{destination!~"ActiveMQ.Advisory.*"}) > 70
for: 5m
labels:
severity: warning
- alert: ApacheActiveMQHighQueueMemoryUsage
annotations:
description: '{{ printf "%.0f" $value }} percent of memory used by queues on
{{$labels.instance}} in cluster {{$labels.activemq_cluster}}, which is above
the threshold of 70 percent.'
summary: Queue destination memory usage is high, which may result in a reduction
of the rate at which producers send messages.
expr: |
sum without (destination) (activemq_queue_memory_percent_usage) > 70
for: 5m
labels:
severity: warning
- alert: ApacheActiveMQHighStoreMemoryUsage
annotations:
description: '{{ printf "%.0f" $value }} percent of store memory used on {{$labels.instance}}
in cluster {{$labels.activemq_cluster}}, which is above the threshold of 70
percent.'
summary: Store memory usage is high, which may result in producers unable to
send messages.
expr: |
activemq_store_usage_ratio > 70
for: 5m
labels:
severity: warning
- alert: ApacheActiveMQHighTemporaryMemoryUsage
annotations:
description: '{{ printf "%.0f" $value }} percent of temporary memory used on
{{$labels.instance}} in cluster {{$labels.activemq_cluster}}, which is above
the threshold of 70 percent.'
summary: Temporary memory usage is high, which may result in saturation of messaging
throughput.
expr: |
activemq_temp_usage_ratio > 70
for: 5m
labels:
severity: warning

View file

@ -0,0 +1,786 @@
{
"__inputs": [ ],
"__requires": [ ],
"annotations": {
"list": [ ]
},
"description": "",
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"apache-activemq-mixin"
],
"targetBlank": false,
"title": "Other Apache ActiveMQ dashboards",
"type": "dashboards",
"url": ""
}
],
"panels": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Number of clusters that are reporting metrics from ActiveMQ.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "fixed"
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "none"
},
"overrides": [ ]
},
"gridPos": {
"h": 4,
"w": 6,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"colorMode": "none",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "10.2.0-60139",
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "count (activemq_memory_usage_ratio{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "__auto"
}
],
"title": "Clusters",
"type": "stat"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Number of broker instances across clusters.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "fixed"
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "none"
},
"overrides": [ ]
},
"gridPos": {
"h": 4,
"w": 6,
"x": 6,
"y": 0
},
"id": 3,
"options": {
"colorMode": "none",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "10.2.0-60139",
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "count (activemq_memory_usage_ratio{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "__auto"
}
],
"title": "Brokers",
"type": "stat"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Number of message producers active on destinations across clusters.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "fixed"
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "none"
},
"overrides": [ ]
},
"gridPos": {
"h": 4,
"w": 6,
"x": 12,
"y": 0
},
"id": 4,
"options": {
"colorMode": "none",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "10.2.0-60139",
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum (activemq_queue_producer_count{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\"}) + sum (activemq_topic_producer_count{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\",destination!~\"ActiveMQ.Advisory.*\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "__auto"
}
],
"title": "Producers",
"type": "stat"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "The number of consumers subscribed to destinations across clusters.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "fixed"
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "none"
},
"overrides": [ ]
},
"gridPos": {
"h": 4,
"w": 6,
"x": 18,
"y": 0
},
"id": 5,
"options": {
"colorMode": "none",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "10.2.0-60139",
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum (activemq_queue_consumer_count{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\"}) + sum (activemq_topic_consumer_count{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\",destination!~\"ActiveMQ.Advisory.*\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "__auto"
}
],
"title": "Consumers",
"type": "stat"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Number of messages that have been sent to destinations in a cluster",
"fieldConfig": {
"defaults": {
"color": {
"fixedColor": "#C8F2C2",
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisShow": false,
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 25,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "none"
},
"overrides": [ ]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 6
},
"id": 6,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum by (activemq_cluster, job) (increase(activemq_queue_enqueue_count{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\"}[$__interval:])) + sum by (activemq_cluster, job) (increase(activemq_topic_enqueue_count{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\", destination!~\"ActiveMQ.Advisory.*\"}[$__interval:]))",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "{{activemq_cluster}}"
}
],
"title": "Enqueue / $__interval",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Number of messages that have been acknowledged (and removed) from destinations in a cluster.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisShow": false,
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 25,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "none"
},
"overrides": [ ]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 6
},
"id": 7,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum by (activemq_cluster, job) (increase(activemq_queue_dequeue_count{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\"}[$__interval:])) + sum by (activemq_cluster, job) (increase(activemq_topic_dequeue_count{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\", destination!~\"ActiveMQ.Advisory.*\"}[$__interval:]))",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "{{activemq_cluster}}"
}
],
"title": "Dequeue / $__interval",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Average percentage of temporary memory used across clusters.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [ ],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "#EAB839",
"value": 50
},
{
"color": "red",
"value": 70
}
]
},
"unit": "percentunit"
},
"overrides": [ ]
},
"gridPos": {
"h": 10,
"w": 8,
"x": 0,
"y": 14
},
"id": 8,
"options": {
"displayMode": "gradient",
"minVizHeight": 10,
"minVizWidth": 0,
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showUnfilled": false,
"text": { },
"valueMode": "color"
},
"pluginVersion": "10.2.0-60139",
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "avg by (activemq_cluster, job) (activemq_temp_usage_ratio{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{activemq_cluster}}"
}
],
"title": "Average temporary memory usage",
"type": "bargauge"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Average percentage of store memory used across clusters.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [ ],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "#EAB839",
"value": 50
},
{
"color": "red",
"value": 70
}
]
},
"unit": "percentunit"
},
"overrides": [ ]
},
"gridPos": {
"h": 10,
"w": 8,
"x": 8,
"y": 14
},
"id": 9,
"options": {
"displayMode": "gradient",
"minVizHeight": 10,
"minVizWidth": 0,
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showUnfilled": false,
"valueMode": "color"
},
"pluginVersion": "10.2.0-60139",
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "avg by (activemq_cluster, job) (activemq_store_usage_ratio{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{activemq_cluster}}"
}
],
"title": "Average store memory usage",
"type": "bargauge"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Average percentage of broker memory used across clusters.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [ ],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "#EAB839",
"value": 50
},
{
"color": "red",
"value": 70
}
]
},
"unit": "percentunit"
},
"overrides": [ ]
},
"gridPos": {
"h": 10,
"w": 8,
"x": 16,
"y": 14
},
"id": 10,
"options": {
"displayMode": "gradient",
"minVizHeight": 10,
"minVizWidth": 0,
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showUnfilled": false,
"valueMode": "color"
},
"pluginVersion": "10.2.0-60139",
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "avg by (activemq_cluster, job) (activemq_memory_usage_ratio{job=~\"$job\", activemq_cluster=~\"$activemq_cluster\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{activemq_cluster}}"
}
],
"title": "Average broker memory usage",
"type": "bargauge"
}
],
"refresh": "1m",
"rows": [ ],
"schemaVersion": 14,
"style": "dark",
"tags": [
"apache-activemq-mixin"
],
"templating": {
"list": [
{
"current": { },
"hide": 0,
"label": "Data source",
"name": "prometheus_datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": ".+",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 0,
"includeAll": true,
"label": "Job",
"multi": true,
"name": "job",
"options": [ ],
"query": "label_values(activemq_topic_producer_count,job)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".*",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 2,
"includeAll": true,
"label": "Cluster",
"multi": true,
"name": "cluster",
"options": [ ],
"query": "label_values(activemq_memory_usage_ratio{job=~\"$job\", cluster=~\"$cluster\"},cluster)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".+",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 0,
"includeAll": true,
"label": "ActiveMQ cluster",
"multi": true,
"name": "activemq_cluster",
"options": [ ],
"query": "label_values(activemq_memory_usage_ratio{job=~\"$job\"},activemq_cluster)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-30m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "default",
"title": "Apache ActiveMQ cluster overview",
"uid": "apache-activemq-cluster-overview",
"version": 0
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,310 @@
{
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"apache-activemq-mixin"
],
"targetBlank": false,
"title": "Other Apache ActiveMQ dashboards",
"type": "dashboards",
"url": ""
}
],
"panels": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"description": "Logs volume grouped by \"level\" label.",
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "bars",
"fillOpacity": 50,
"stacking": {
"mode": "normal"
}
},
"unit": "none"
},
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": "(E|e)merg|(F|f)atal|(A|a)lert|(C|c)rit.*"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "purple",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "(E|e)(rr.*|RR.*)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "red",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "(W|w)(arn.*|ARN.*|rn|RN)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "orange",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "(N|n)(otice|ote)|(I|i)(nf.*|NF.*)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "green",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "dbg.*|DBG.*|(D|d)(EBUG|ebug)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "blue",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "(T|t)(race|RACE)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "light-blue",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "logs"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "text",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 6,
"w": 24
},
"id": 1,
"interval": "30s",
"options": {
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "v10.0.0",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"expr": "sum by (level) (count_over_time({job=~\"integrations/apache-activemq\",job=~\"$job\",activemq_cluster=~\"$activemq_cluster\",instance=~\"$instance\",level=~\"$level\"}\n|~ \"$regex_search\"\n\n[$__interval]))\n",
"legendFormat": "{{ level }}"
}
],
"title": "Logs volume",
"transformations": [
{
"id": "renameByRegex",
"options": {
"regex": "Value",
"renamePattern": "logs"
}
}
],
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"gridPos": {
"h": 18,
"w": 24
},
"id": 2,
"options": {
"dedupStrategy": "exact",
"enableLogDetails": true,
"prettifyLogMessage": true,
"showTime": false,
"wrapLogMessage": true
},
"pluginVersion": "v10.0.0",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"expr": "{job=~\"integrations/apache-activemq\",job=~\"$job\",activemq_cluster=~\"$activemq_cluster\",instance=~\"$instance\",level=~\"$level\"} \n|~ \"$regex_search\"\n\n\n"
}
],
"title": "Logs",
"type": "logs"
}
],
"refresh": "1m",
"schemaVersion": 36,
"tags": [
"apache-activemq-mixin"
],
"templating": {
"list": [
{
"label": "Loki data source",
"name": "loki_datasource",
"query": "loki",
"regex": "",
"type": "datasource"
},
{
"allValue": ".*",
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"includeAll": true,
"label": "Job",
"multi": true,
"name": "job",
"query": "label_values({job=~\"integrations/apache-activemq\"}, job)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"allValue": ".*",
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"includeAll": true,
"label": "Activemq_cluster",
"multi": true,
"name": "activemq_cluster",
"query": "label_values({job=~\"integrations/apache-activemq\",job=~\"$job\"}, activemq_cluster)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"allValue": ".*",
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"includeAll": true,
"label": "Instance",
"multi": true,
"name": "instance",
"query": "label_values({job=~\"integrations/apache-activemq\",job=~\"$job\",activemq_cluster=~\"$activemq_cluster\"}, instance)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"allValue": ".*",
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"includeAll": true,
"label": "Level",
"multi": true,
"name": "level",
"query": "label_values({job=~\"integrations/apache-activemq\",job=~\"$job\",activemq_cluster=~\"$activemq_cluster\",instance=~\"$instance\"}, level)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"current": {
"selected": false,
"text": "",
"value": ""
},
"label": "Regex search",
"name": "regex_search",
"options": [
{
"selected": true,
"text": "",
"value": ""
}
],
"query": "",
"type": "textbox"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timezone": "utc",
"title": "Apache ActiveMQ logs",
"uid": "apache-activemq-logs"
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1 @@
null

View file

@ -0,0 +1,45 @@
groups:
- name: apache-airflow
rules:
- alert: ApacheAirflowStarvingPoolTasks
annotations:
description: |
The number of starved tasks is {{ printf "%.0f" $value }} over the last 5m on {{ $labels.instance }} - {{ $labels.pool_name }} which is above the threshold of 0.
summary: There are starved tasks detected in the Apache Airflow pool.
expr: |
airflow_pool_starving_tasks > 0
for: 5m
labels:
severity: critical
- alert: ApacheAirflowDAGScheduleDelayWarningLevel
annotations:
description: |
The average delay in DAG schedule to run time is {{ printf "%.0f" $value }} over the last 1m on {{ $labels.instance }} - {{ $labels.dag_id }} which is above the threshold of 10.
summary: The delay in DAG schedule time to DAG run time has reached the warning
threshold.
expr: |
increase(airflow_dagrun_schedule_delay_sum[5m]) / clamp_min(increase(airflow_dagrun_schedule_delay_count[5m]),1) > 10
for: 1m
labels:
severity: warning
- alert: ApacheAirflowDAGScheduleDelayCriticalLevel
annotations:
description: |
The average delay in DAG schedule to run time is {{ printf "%.0f" $value }} over the last 1m for {{ $labels.instance }} - {{ $labels.dag_id }} which is above the threshold of 60.
summary: The delay in DAG schedule time to DAG run time has reached the critical
threshold.
expr: |
increase(airflow_dagrun_schedule_delay_sum[5m]) / clamp_min(increase(airflow_dagrun_schedule_delay_count[5m]),1) > 60
for: 1m
labels:
severity: critical
- alert: ApacheAirflowDAGFailures
annotations:
description: |
The number of DAG failures seen is {{ printf "%.0f" $value }} over the last 1m for {{ $labels.instance }} - {{ $labels.dag_id }} which is above the threshold of 0.
summary: There have been DAG failures detected.
expr: |
increase(airflow_dagrun_duration_failed_count[5m]) > 0
for: 1m
labels:
severity: critical

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1 @@
null

View file

@ -0,0 +1 @@
null

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1 @@
null

View file

@ -0,0 +1,91 @@
groups:
- name: ApacheCassandraAlerts
rules:
- alert: HighReadLatency
annotations:
description: 'An average of {{ printf "%.0f" $value }}ms of read latency has
occurred over the last 5 minutes on {{$labels.instance}}, which is above the
threshold of 200ms. '
summary: There is a high level of read latency within the node.
expr: |
sum(cassandra_table_readlatency_seconds_sum) by (instance) / sum(cassandra_table_readlatency_seconds_count) by (instance) * 1000 > 200
for: 5m
labels:
severity: critical
- alert: HighWriteLatency
annotations:
description: 'An average of {{ printf "%.0f" $value }}ms of write latency has
occurred over the last 5 minutes on {{$labels.instance}}, which is above the
threshold of 200ms. '
summary: There is a high level of write latency within the node.
expr: |
sum(cassandra_keyspace_writelatency_seconds_sum) by (instance) / sum(cassandra_keyspace_writelatency_seconds_count) by (instance) * 1000 > 200
for: 5m
labels:
severity: critical
- alert: HighPendingCompactionTasks
annotations:
description: '{{ printf "%.0f" $value }} compaction tasks have been pending
over the last 15 minutes on {{$labels.instance}}, which is above the threshold
of 30. '
summary: Compaction task queue is filling up.
expr: |
cassandra_compaction_pendingtasks > 30
for: 15m
labels:
severity: warning
- alert: BlockedCompactionTasksFound
annotations:
description: '{{ printf "%.0f" $value }} compaction tasks have been blocked
over the last 5 minutes on {{$labels.instance}}, which is above the threshold
of 1. '
summary: Compaction task queue is full.
expr: |
cassandra_threadpools_currentlyblockedtasks_count{threadpools="CompactionExecutor", path="internal"} > 1
for: 5m
labels:
severity: critical
- alert: HintsStoredOnNode
annotations:
description: '{{ printf "%.0f" $value }} hints have been written to the node
over the last minute on {{$labels.instance}}, which is above the threshold
of 1. '
summary: Hints have been recently written to this node.
expr: |
increase(cassandra_storage_totalhints_count[5m]) > 1
for: 1m
labels:
severity: warning
- alert: UnavailableWriteRequestsFound
annotations:
description: '{{ printf "%.0f" $value }} unavailable write requests have been
found over the last 5 minutes on {{$labels.instance}}, which is above the
threshold of 1. '
summary: Unavailable exceptions have been encountered while performing writes
in this cluster.
expr: |
sum(cassandra_clientrequest_unavailables_count{clientrequest="Write"}) by (cassandra_cluster) > 1
for: 5m
labels:
severity: critical
- alert: HighCpuUsage
annotations:
description: 'Cpu usage is at {{ printf "%.0f" $value }} percent over the last
5 minutes on {{$labels.instance}}, which is above the threshold of 80. '
summary: A node has a CPU usage higher than the configured threshold.
expr: |
jvm_process_cpu_load{job=~"integrations/apache-cassandra"} * 100 > 80
for: 5m
labels:
severity: critical
- alert: HighMemoryUsage
annotations:
description: 'Memory usage is at {{ printf "%.0f" $value }} percent over the
last 5 minutes on {{$labels.instance}}, which is above the threshold of 80
}}. '
summary: A node has a higher memory utilization than the configured threshold.
expr: |
sum(jvm_memory_usage_used_bytes{job=~"integrations/apache-cassandra", area="Heap"}) / sum(jvm_physical_memory_size{job=~"integrations/apache-cassandra"}) * 100 > 80
for: 5m
labels:
severity: critical

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1 @@
null

View file

@ -0,0 +1,114 @@
groups:
- name: ApacheCouchDBAlerts
rules:
- alert: CouchDBUnhealthyCluster
annotations:
description: '{{$labels.couchdb_cluster}} has reported a value of {{ printf
"%.0f" $value }} for its stability over the last 5 minutes, which is below
the threshold of 1.'
summary: At least one of the nodes in a cluster is reporting the cluster as
being unstable.
expr: |
min by(job, couchdb_cluster) (couchdb_couch_replicator_cluster_is_stable) < 1
for: 5m
labels:
severity: critical
- alert: CouchDBHigh4xxResponseCodes
annotations:
description: '{{ printf "%.0f" $value }} 4xx responses have been detected over
the last 5 minutes on {{$labels.instance}}, which is above the threshold of
5.'
summary: There are a high number of 4xx responses for incoming requests to a
node.
expr: |
sum by(job, instance) (increase(couchdb_httpd_status_codes{code=~"4.*"}[5m])) > 5
for: 5m
labels:
severity: warning
- alert: CouchDBHigh5xxResponseCodes
annotations:
description: '{{ printf "%.0f" $value }} 5xx responses have been detected over
the last 5 minutes on {{$labels.instance}}, which is above the threshold of
0.'
summary: There are a high number of 5xx responses for incoming requests to a
node.
expr: |
sum by(job, instance) (increase(couchdb_httpd_status_codes{code=~"5.*"}[5m])) > 0
for: 5m
labels:
severity: critical
- alert: CouchDBModerateRequestLatency
annotations:
description: 'An average of {{ printf "%.0f" $value }}ms of request latency
has occurred over the last 5 minutes on {{$labels.instance}}, which is above
the threshold of 500ms. '
summary: There is a moderate level of request latency for a node.
expr: |
sum by(job, instance) (couchdb_request_time_seconds_sum / couchdb_request_time_seconds_count) > 500
for: 5m
labels:
severity: warning
- alert: CouchDBHighRequestLatency
annotations:
description: 'An average of {{ printf "%.0f" $value }}ms of request latency
has occurred over the last 5 minutes on {{$labels.instance}}, which is above
the threshold of 1000ms. '
summary: There is a high level of request latency for a node.
expr: |
sum by(job, instance) (couchdb_request_time_seconds_sum / couchdb_request_time_seconds_count) > 1000
for: 5m
labels:
severity: critical
- alert: CouchDBManyReplicatorJobsPending
annotations:
description: '{{ printf "%.0f" $value }} replicator jobs are pending on {{$labels.instance}},
which is above the threshold of 10. '
summary: There is a high number of replicator jobs pending for a node.
expr: |
sum by(job, instance) (couchdb_couch_replicator_jobs_pending) > 10
for: 5m
labels:
severity: warning
- alert: CouchDBReplicatorJobsCrashing
annotations:
description: '{{ printf "%.0f" $value }} replicator jobs have crashed over the
last 5 minutes on {{$labels.instance}}, which is above the threshold of 0. '
summary: There are replicator jobs crashing for a node.
expr: |
sum by(job, instance) (increase(couchdb_couch_replicator_jobs_crashes_total[5m])) > 0
for: 5m
labels:
severity: critical
- alert: CouchDBReplicatorChangesQueuesDying
annotations:
description: '{{ printf "%.0f" $value }} replicator changes queue processes
have died over the last 5 minutes on {{$labels.instance}}, which is above
the threshold of 0. '
summary: There are replicator changes queue process deaths for a node.
expr: |
sum by(job, instance) (increase(couchdb_couch_replicator_changes_queue_deaths_total[5m])) > 0
for: 5m
labels:
severity: warning
- alert: CouchDBReplicatorConnectionOwnersCrashing
annotations:
description: '{{ printf "%.0f" $value }} replicator connection owner processes
have crashed over the last 5 minutes on {{$labels.instance}}, which is above
the threshold of 0. '
summary: There are replicator connection owner process crashes for a node.
expr: |
sum by(job, instance) (increase(couchdb_couch_replicator_connection_owner_crashes_total[5m])) > 0
for: 5m
labels:
severity: warning
- alert: CouchDBReplicatorConnectionWorkersCrashing
annotations:
description: '{{ printf "%.0f" $value }} replicator connection worker processes
have crashed over the last 5 minutes on {{$labels.instance}}, which is above
the threshold of 0. '
summary: There are replicator connection worker process crashes for a node.
expr: |
sum by(job, instance) (increase(couchdb_couch_replicator_connection_worker_crashes_total[5m])) > 0
for: 5m
labels:
severity: warning

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1 @@
null

View file

@ -0,0 +1,90 @@
groups:
- name: apache-hadoop
rules:
- alert: ApacheHadoopLowHDFSCapacity
annotations:
description: '{{ printf "%.0f" $value }} percent remaining HDFS usage on {{$labels.hadoop_cluster}}
- {{$labels.instance}}, which is below the threshold of 20.'
summary: Remaining HDFS cluster capacity is low which may result in DataNode
failures or prevent DataNodes from writing data.
expr: |
min without(job, name) (100 * hadoop_namenode_capacityremaining / clamp_min(hadoop_namenode_capacitytotal, 1)) < 20
for: 5m
labels:
severity: warning
- alert: ApacheHadoopHDFSMissingBlocks
annotations:
description: '{{ printf "%.0f" $value }} HDFS missing blocks on {{$labels.hadoop_cluster}}
- {{$labels.instance}}, which is above the threshold of 0.'
summary: There are missing blocks in the HDFS cluster which may indicate potential
data loss.
expr: |
max without(job, name) (hadoop_namenode_missingblocks) > 0
for: 5m
labels:
severity: critical
- alert: ApacheHadoopHDFSHighVolumeFailures
annotations:
description: '{{ printf "%.0f" $value }} HDFS volume failures on {{$labels.hadoop_cluster}}
- {{$labels.instance}}, which is above the threshold of 0.'
summary: A volume failure in HDFS cluster may indicate hardware failures.
expr: |
max without(job, name) (hadoop_namenode_volumefailurestotal) > 0
for: 5m
labels:
severity: critical
- alert: ApacheHadoopHighDeadDataNodes
annotations:
description: '{{ printf "%.0f" $value }} dead HDFS volume failures on {{$labels.hadoop_cluster}}
- {{$labels.instance}}, which is above the threshold of 0.'
summary: Number of dead DataNodes has increased, which could result in data
loss and increased network activity.
expr: |
max without(job, name) (hadoop_namenode_numdeaddatanodes) > 0
for: 5m
labels:
severity: critical
- alert: ApacheHadoopHighNodeManagerCPUUsage
annotations:
description: '{{ printf "%.0f" $value }} CPU usage on {{$labels.hadoop_cluster}}
- {{$labels.instance}}, which is above the threshold of 80.'
summary: A NodeManager has a CPU usage higher than the configured threshold.
expr: |
max without(job, name) (100 * hadoop_nodemanager_nodecpuutilization) > 80
for: 5m
labels:
severity: critical
- alert: ApacheHadoopHighNodeManagerMemoryUsage
annotations:
description: '{{ printf "%.0f" $value}} percent NodeManager memory usage on
{{$labels.hadoop_cluster}} - {{$labels.instance}}, which is above the threshold
of 80.'
summary: A NodeManager has a higher memory utilization than the configured threshold.
expr: |
max without(job, name) (100 * hadoop_nodemanager_allocatedgb / clamp_min(hadoop_nodemanager_availablegb + hadoop_nodemanager_allocatedgb,1)) > 80
for: 5m
labels:
severity: critical
- alert: ApacheHadoopHighResourceManagerVirtualCoreCPUUsage
annotations:
description: '{{ printf "%.0f" $value }} virtual core CPU usage on {{$labels.hadoop_cluster}}
- {{$labels.instance}}, which is above the threshold of 80.'
summary: A ResourceManager has a virtual core CPU usage higher than the configured
threshold.
expr: |
max without(job, name) (100 * hadoop_resourcemanager_allocatedvcores / clamp_min(hadoop_resourcemanager_availablevcores + hadoop_resourcemanager_allocatedvcores,1)) > 80
for: 5m
labels:
severity: critical
- alert: ApacheHadoopHighResourceManagerMemoryUsage
annotations:
description: '{{ printf "%.0f" $value}} percent ResourceManager memory usage
on {{$labels.hadoop_cluster}} - {{$labels.instance}}, which is above the threshold
of 80.'
summary: A ResourceManager has a higher memory utilization than the configured
threshold.
expr: |
max without(job, name) (100 * hadoop_resourcemanager_allocatedmb / clamp_min(hadoop_resourcemanager_availablemb + hadoop_resourcemanager_allocatedmb,1)) > 80
for: 5m
labels:
severity: critical

View file

@ -0,0 +1,488 @@
{
"__inputs": [ ],
"__requires": [ ],
"annotations": {
"list": [ ]
},
"description": "",
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"apache-hadoop-mixin"
],
"targetBlank": false,
"title": "Other Apache Hadoop dashboards",
"type": "dashboards",
"url": ""
}
],
"panels": [
{
"collapsed": false,
"datasource": {
"uid": "${prometheus_datasource}"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 2,
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": ""
}
],
"title": "DataNodes",
"type": "row"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Total number of blocks evicted without being read by the Hadoop DataNode.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": ""
},
"overrides": [ ]
},
"gridPos": {
"h": 6,
"w": 8,
"x": 0,
"y": 1
},
"id": 3,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "increase(hadoop_datanode_ramdiskblocksevictedwithoutread{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\"}[$__interval:])",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "{{hadoop_cluster}} - {{instance}}"
}
],
"title": "Unread blocks evicted",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Total number of blocks removed by the Hadoop DataNode.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": ""
},
"overrides": [ ]
},
"gridPos": {
"h": 6,
"w": 8,
"x": 8,
"y": 1
},
"id": 4,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "increase(hadoop_datanode_blocksremoved{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\"}[$__interval:])",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "{{hadoop_cluster}} - {{instance}}"
}
],
"title": "Blocks removed",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Displays the total number of volume failures encountered by the Hadoop DataNode.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": ""
},
"overrides": [ ]
},
"gridPos": {
"h": 6,
"w": 8,
"x": 16,
"y": 1
},
"id": 5,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "increase(hadoop_datanode_volumefailures{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\"}[$__interval:])",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "{{hadoop_cluster}} - {{instance}}"
}
],
"title": "Volume failures",
"type": "timeseries"
},
{
"datasource": {
"uid": "${loki_datasource}"
},
"description": "The DataNode logs.",
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 7
},
"id": 6,
"options": {
"dedupStrategy": "none",
"enableLogDetails": true,
"prettifyLogMessage": false,
"showCommonLabels": false,
"showLabels": false,
"showTime": false,
"sortOrder": "Descending",
"wrapLogMessage": false
},
"targets": [
{
"datasource": {
"uid": "${loki_datasource}"
},
"editorMode": "code",
"expr": "{job=~\"$job\", hadoop_cluster=~\"$hadoop_cluster\", instance=~\"$instance\", filename=~\".*/hadoop/logs/.*-datanode.*.log\"} |= ``",
"queryType": "range",
"refId": "A"
}
],
"title": "DataNode logs",
"type": "logs"
}
],
"refresh": "1m",
"rows": [ ],
"schemaVersion": 14,
"style": "dark",
"tags": [
"apache-hadoop-mixin"
],
"templating": {
"list": [
{
"current": { },
"hide": 0,
"label": "Data Source",
"name": "prometheus_datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"current": { },
"hide": 0,
"label": "Loki Datasource",
"name": "loki_datasource",
"options": [ ],
"query": "loki",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": ".+",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 0,
"includeAll": true,
"label": "Job",
"multi": true,
"name": "job",
"options": [ ],
"query": "label_values(hadoop_datanode_ramdiskblocksevictedwithoutread,job)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".+",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 0,
"includeAll": true,
"label": "Instance",
"multi": true,
"name": "instance",
"options": [ ],
"query": "label_values(hadoop_datanode_ramdiskblocksevictedwithoutread{job=~\"$job\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".+",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 0,
"includeAll": true,
"label": "Hadoop cluster",
"multi": true,
"name": "hadoop_cluster",
"options": [ ],
"query": "label_values(hadoop_datanode_ramdiskblocksevictedwithoutread{job=~\"$job\"}, hadoop_cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "default",
"title": "Apache Hadoop DataNode overview",
"uid": "apache-hadoop-datanode-overview",
"version": 0
}

View file

@ -0,0 +1,983 @@
{
"__inputs": [ ],
"__requires": [ ],
"annotations": {
"list": [ ]
},
"description": "",
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"apache-hadoop-mixin"
],
"targetBlank": false,
"title": "Other Apache Hadoop dashboards",
"type": "dashboards",
"url": ""
}
],
"panels": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "The DataNodes current state.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
}
},
"mappings": [ ]
},
"overrides": [ ]
},
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"pieType": "pie",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "hadoop_namenode_numlivedatanodes{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\", name=\"FSNamesystem\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{hadoop_cluster}} - {{instance}} - live DataNodes"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "hadoop_namenode_numdeaddatanodes{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\", name=\"FSNamesystem\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{hadoop_cluster}} - {{instance}} - dead DataNodes"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "hadoop_namenode_numstaledatanodes{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{hadoop_cluster}} - {{instance}} - stale DataNodes"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "hadoop_namenode_numdecommissioningdatanodes{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\", name=\"FSNamesystem\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{hadoop_cluster}} - {{instance}} - decommissioning DataNodes"
}
],
"title": "DataNode state",
"type": "piechart"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "The storage utilization of the NameNode.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "percent"
},
"overrides": [ ]
},
"gridPos": {
"h": 9,
"w": 12,
"x": 12,
"y": 0
},
"id": 3,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "100 * hadoop_namenode_capacityused{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\", name=\"FSNamesystem\"} / clamp_min(hadoop_namenode_capacitytotal{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\", name=\"FSNamesystem\"}, 1)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{hadoop_cluster}} - {{instance}}"
}
],
"title": "Capacity utilization",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Total number of blocks managed by the NameNode.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": ""
},
"overrides": [ ]
},
"gridPos": {
"h": 6,
"w": 8,
"x": 0,
"y": 9
},
"id": 4,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "hadoop_namenode_blockstotal{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\", name=\"FSNamesystem\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{hadoop_cluster}} - {{instance}}"
}
],
"title": "Total blocks",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Number of blocks reported by DataNodes as missing.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": ""
},
"overrides": [ ]
},
"gridPos": {
"h": 6,
"w": 8,
"x": 8,
"y": 9
},
"id": 5,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "hadoop_namenode_missingblocks{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\", name=\"FSNamesystem\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{hadoop_cluster}} - {{instance}}"
}
],
"title": "Missing blocks",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Number of blocks that are under-replicated.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": ""
},
"overrides": [ ]
},
"gridPos": {
"h": 6,
"w": 8,
"x": 16,
"y": 9
},
"id": 6,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "hadoop_namenode_underreplicatedblocks{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\", name=\"FSNamesystem\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{hadoop_cluster}} - {{instance}}"
}
],
"title": "Under-replicated blocks",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Number of transactions processed by the NameNode since the last checkpoint.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": ""
},
"overrides": [ ]
},
"gridPos": {
"h": 6,
"w": 12,
"x": 0,
"y": 15
},
"id": 7,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "hadoop_namenode_transactionssincelastcheckpoint{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\", name=\"FSNamesystem\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{hadoop_cluster}} - {{instance}}"
}
],
"title": "Transactions since last checkpoint",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "The recent increase in number of volume failures on all DataNodes.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": ""
},
"overrides": [ ]
},
"gridPos": {
"h": 6,
"w": 12,
"x": 12,
"y": 15
},
"id": 8,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "increase(hadoop_namenode_volumefailurestotal{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\", name=\"FSNamesystem\"}[$__interval:])",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "{{hadoop_cluster}} - {{instance}}"
}
],
"title": "Volume failures",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Total number of files managed by the NameNode.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": ""
},
"overrides": [ ]
},
"gridPos": {
"h": 6,
"w": 12,
"x": 0,
"y": 21
},
"id": 9,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "hadoop_namenode_filestotal{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\", name=\"FSNamesystem\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{hadoop_cluster}} - {{instance}}"
}
],
"title": "Total files",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Total load on the NameNode.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": ""
},
"overrides": [ ]
},
"gridPos": {
"h": 6,
"w": 12,
"x": 12,
"y": 21
},
"id": 10,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "hadoop_namenode_totalload{job=~\"$job\", instance=~\"$instance\", hadoop_cluster=~\"$hadoop_cluster\", name=\"FSNamesystem\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{hadoop_cluster}} - {{instance}}"
}
],
"title": "Total load",
"type": "timeseries"
},
{
"datasource": {
"uid": "${loki_datasource}"
},
"description": "The NameNode logs.",
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 27
},
"id": 11,
"options": {
"dedupStrategy": "none",
"enableLogDetails": true,
"prettifyLogMessage": false,
"showCommonLabels": false,
"showLabels": false,
"showTime": false,
"sortOrder": "Descending",
"wrapLogMessage": false
},
"targets": [
{
"datasource": {
"uid": "${loki_datasource}"
},
"editorMode": "code",
"expr": "{job=~\"$job\", hadoop_cluster=~\"$hadoop_cluster\", instance=~\"$instance\", filename=~\".*/hadoop/logs/.*-namenode.*.log\"} |= ``",
"queryType": "range",
"refId": "A"
}
],
"title": "NameNode logs",
"type": "logs"
}
],
"refresh": "1m",
"rows": [ ],
"schemaVersion": 14,
"style": "dark",
"tags": [
"apache-hadoop-mixin"
],
"templating": {
"list": [
{
"current": { },
"hide": 0,
"label": "Data Source",
"name": "prometheus_datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"current": { },
"hide": 0,
"label": "Loki Datasource",
"name": "loki_datasource",
"options": [ ],
"query": "loki",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": ".+",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 0,
"includeAll": true,
"label": "Job",
"multi": true,
"name": "job",
"options": [ ],
"query": "label_values(hadoop_namenode_blockstotal,job)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".+",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 0,
"includeAll": true,
"label": "Instance",
"multi": true,
"name": "instance",
"options": [ ],
"query": "label_values(hadoop_namenode_blockstotal{job=~\"$job\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".+",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 0,
"includeAll": true,
"label": "Hadoop cluster",
"multi": true,
"name": "hadoop_cluster",
"options": [ ],
"query": "label_values(hadoop_namenode_blockstotal{job=~\"$job\"}, hadoop_cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "default",
"title": "Apache Hadoop NameNode overview",
"uid": "apache-hadoop-namenode-overview",
"version": 0
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1 @@
null

View file

@ -0,0 +1,59 @@
groups:
- name: apache-hbase-alerts
rules:
- alert: HBaseHighHeapMemUsage
annotations:
description: The heap memory usage for the JVM on instance {{$labels.instance}}
in cluster {{$labels.hbase_cluster}} is {{printf "%.0f" $value}} percent,
which is above the threshold of 80 percent
summary: There is a limited amount of heap memory available to the JVM.
expr: |
100 * sum without(context, hostname, processname) (jvm_metrics_mem_heap_used_m{job=~"integrations/apache-hbase"} / clamp_min(jvm_metrics_mem_heap_committed_m{job=~"integrations/apache-hbase"}, 1)) > 80
for: 5m
labels:
severity: warning
- alert: HBaseDeadRegionServer
annotations:
description: '{{$value}} RegionServer(s) in cluster {{$labels.hbase_cluster}}
are unresponsive, which is above the threshold of 0. The name(s) of the dead
RegionServer(s) are {{$labels.deadregionservers}}'
summary: One or more RegionServer(s) has become unresponsive.
expr: |
server_num_dead_region_servers > 0
for: 5m
labels:
severity: warning
- alert: HBaseOldRegionsInTransition
annotations:
description: '{{printf "%.0f" $value}} percent of RegionServers in transition
in cluster {{$labels.hbase_cluster}} are transitioning for longer than expected,
which is above the threshold of 50 percent'
summary: RegionServers are in transition for longer than expected.
expr: |
100 * assignment_manager_rit_count_over_threshold / clamp_min(assignment_manager_rit_count, 1) > 50
for: 5m
labels:
severity: warning
- alert: HBaseHighMasterAuthFailRate
annotations:
description: '{{printf "%.0f" $value}} percent of authentication attempts to
the master are failing in cluster {{$labels.hbase_cluster}}, which is above
the threshold of 35 percent'
summary: A high percentage of authentication attempts to the master are failing.
expr: |
100 * rate(master_authentication_failures[5m]) / (clamp_min(rate(master_authentication_successes[5m]), 1) + clamp_min(rate(master_authentication_failures[5m]), 1)) > 35
for: 5m
labels:
severity: warning
- alert: HBaseHighRSAuthFailRate
annotations:
description: '{{printf "%.0f" $value}} percent of authentication attempts to
the RegionServer {{$labels.instance}} are failing in cluster {{$labels.hbase_cluster}},
which is above the threshold of 35 percent'
summary: A high percentage of authentication attempts to a RegionServer are
failing.
expr: |
100 * rate(region_server_authentication_failures[5m]) / (clamp_min(rate(region_server_authentication_successes[5m]), 1) + clamp_min(rate(region_server_authentication_failures[5m]), 1)) > 35
for: 5m
labels:
severity: warning

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,325 @@
{
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": false,
"keepTime": true,
"tags": [
"apache-hbase-mixin"
],
"targetBlank": false,
"title": "Other Apache HBase Dashboards",
"type": "dashboards",
"url": ""
}
],
"panels": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"description": "Logs volume grouped by \"level\" label.",
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "bars",
"fillOpacity": 50,
"stacking": {
"mode": "normal"
}
},
"unit": "none"
},
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": "(E|e)merg|(F|f)atal|(A|a)lert|(C|c)rit.*"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "purple",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "(E|e)(rr.*|RR.*)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "red",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "(W|w)(arn.*|ARN.*|rn|RN)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "orange",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "(N|n)(otice|ote)|(I|i)(nf.*|NF.*)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "green",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "dbg.*|DBG.*|(D|d)(EBUG|ebug)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "blue",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "(T|t)(race|RACE)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "light-blue",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "logs"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "text",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 6,
"w": 24
},
"id": 1,
"interval": "30s",
"options": {
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "v10.0.0",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"expr": "sum by (level) (count_over_time({job=~\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\",logger=~\"$logger\",level=~\"$level\"}\n|~ \"$regex_search\"\n\n[$__interval]))\n",
"legendFormat": "{{ level }}"
}
],
"title": "Logs volume",
"transformations": [
{
"id": "renameByRegex",
"options": {
"regex": "Value",
"renamePattern": "logs"
}
}
],
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"gridPos": {
"h": 18,
"w": 24
},
"id": 2,
"options": {
"dedupStrategy": "exact",
"enableLogDetails": true,
"prettifyLogMessage": true,
"showTime": false,
"wrapLogMessage": true
},
"pluginVersion": "v10.0.0",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"expr": "{job=~\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\",logger=~\"$logger\",level=~\"$level\"} \n|~ \"$regex_search\"\n\n\n"
}
],
"title": "Logs",
"type": "logs"
}
],
"refresh": "1m",
"schemaVersion": 36,
"tags": [
"apache-hbase-mixin"
],
"templating": {
"list": [
{
"label": "Loki data source",
"name": "loki_datasource",
"query": "loki",
"regex": "",
"type": "datasource"
},
{
"allValue": ".*",
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"includeAll": true,
"label": "Job",
"multi": true,
"name": "job",
"query": "label_values({job=~\"integrations/apache-hbase\"}, job)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"allValue": ".*",
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"includeAll": true,
"label": "Hbase_cluster",
"multi": true,
"name": "hbase_cluster",
"query": "label_values({job=~\"integrations/apache-hbase\",job=~\"$job\"}, hbase_cluster)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"allValue": ".*",
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"includeAll": true,
"label": "Instance",
"multi": true,
"name": "instance",
"query": "label_values({job=~\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\"}, instance)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"allValue": ".*",
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"includeAll": true,
"label": "Logger",
"multi": true,
"name": "logger",
"query": "label_values({job=~\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\"}, logger)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"allValue": ".*",
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"includeAll": true,
"label": "Level",
"multi": true,
"name": "level",
"query": "label_values({job=~\"integrations/apache-hbase\",job=~\"$job\",hbase_cluster=~\"$hbase_cluster\",instance=~\"$instance\",logger=~\"$logger\"}, level)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"current": {
"selected": false,
"text": "",
"value": ""
},
"label": "Regex search",
"name": "regex_search",
"options": [
{
"selected": true,
"text": "",
"value": ""
}
],
"query": "",
"type": "textbox"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timezone": "utc",
"title": "Apache HBase logs overview",
"uid": "apache-hbase-logs-overview"
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1 @@
null

View file

@ -0,0 +1,41 @@
groups:
- name: apache-http
rules:
- alert: ApacheDown
annotations:
description: Apache is down on {{ $labels.instance }}.
summary: Apache is down.
expr: apache_up == 0
for: 5m
labels:
severity: warning
- alert: ApacheRestart
annotations:
description: Apache has just been restarted on {{ $labels.instance }}.
summary: Apache restart.
expr: apache_uptime_seconds_total / 60 < 1
for: "0"
labels:
severity: info
- alert: ApacheWorkersLoad
annotations:
description: |
Apache workers in busy state approach the max workers count 80% workers busy on {{ $labels.instance }}.
The current value is {{ $value }}%.
summary: Apache workers load is too high.
expr: |
(sum by (instance) (apache_workers{state="busy"}) / sum by (instance) (apache_scoreboard) ) * 100 > 80
for: 15m
labels:
severity: warning
- alert: ApacheResponseTimeTooHigh
annotations:
description: |
Apache average response time is above the threshold of 5000 ms on {{ $labels.instance }}.
The current value is {{ $value }} ms.
summary: Apache response time is too high.
expr: |
increase(apache_duration_ms_total[5m])/increase(apache_accesses_total[5m]) > 5000
for: 15m
labels:
severity: warning

View file

@ -0,0 +1,872 @@
{
"__inputs": [ ],
"__requires": [ ],
"annotations": {
"list": [ ]
},
"editable": false,
"gnetId": null,
"graphTooltip": 1,
"hideControls": false,
"id": null,
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"apache-http-mixin"
],
"targetBlank": false,
"title": "Other Apache HTTP dashboards",
"type": "dashboards",
"url": ""
}
],
"panels": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 1,
"mappings": [
{
"options": {
"match": "null",
"result": {
"text": "N/A"
}
},
"type": "special"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
}
},
"gridPos": {
"h": 3,
"w": 4,
"x": 0,
"y": 0
},
"id": 2,
"maxDataPoints": 100,
"options": {
"colorMode": "none",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "8.4.5",
"targets": [
{
"expr": "apache_uptime_seconds_total{job=~\"$job\", instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 1,
"step": 240
}
],
"title": "Uptime",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 1,
"mappings": [
{
"options": {
"match": "null",
"result": {
"text": "N/A"
}
},
"type": "special"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "none"
}
},
"gridPos": {
"h": 3,
"w": 4,
"x": 4,
"y": 0
},
"id": 3,
"maxDataPoints": 100,
"options": {
"colorMode": "none",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"text": {
"titleSize": 2
},
"textMode": "name"
},
"pluginVersion": "8.4.5",
"targets": [
{
"exemplar": false,
"expr": "apache_info{job=~\"$job\", instance=~\"$instance\"}",
"format": "time_series",
"instant": true,
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{ version }}",
"step": 240
}
],
"title": "Version",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "continuous-GrYlRd"
},
"custom": {
"fillOpacity": 70,
"lineWidth": 0,
"spanNulls": false
},
"mappings": [
{
"options": {
"0": {
"color": "red",
"index": 1,
"text": "Down"
},
"1": {
"color": "green",
"index": 0,
"text": "Up"
}
},
"type": "value"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
}
},
"gridPos": {
"h": 3,
"w": 16,
"x": 8,
"y": 0
},
"id": 4,
"options": {
"alignValue": "left",
"legend": {
"displayMode": "list",
"placement": "right"
},
"mergeValues": false,
"rowHeight": 0.90000000000000002,
"showValue": "never",
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.4.5",
"targets": [
{
"exemplar": true,
"expr": "apache_up{job=~\"$job\", instance=~\"$instance\"}",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"legendFormat": "Apache up",
"refId": "A",
"step": 240
}
],
"title": "Apache Up / Down",
"type": "state-timeline"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "reqps"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Bytes sent"
},
"properties": [
{
"id": "custom.axisPlacement",
"value": "right"
},
{
"id": "custom.drawStyle",
"value": "bars"
},
{
"id": "unit",
"value": "Bps"
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 1
},
"id": 5,
"options": {
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max",
"min"
],
"displayMode": "table",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.4.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"exemplar": false,
"expr": "rate(apache_accesses_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"legendFormat": "Calls",
"refId": "A",
"step": 240
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"exemplar": false,
"expr": "rate(apache_sent_kilobytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) * 1000",
"hide": false,
"interval": "",
"legendFormat": "Bytes sent",
"refId": "B"
}
],
"title": "Load",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "ms"
}
},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 1
},
"id": 6,
"options": {
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max",
"min"
],
"displayMode": "table",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.4.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"exemplar": false,
"expr": "increase(apache_duration_ms_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])/increase(apache_accesses_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"legendFormat": "Average response time",
"refId": "A",
"step": 240
}
],
"title": "Response time",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "stepAfter",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [ ]
},
"gridPos": {
"h": 10,
"w": 24,
"x": 0,
"y": 2
},
"id": 7,
"links": [ ],
"options": {
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max",
"min"
],
"displayMode": "table",
"placement": "right",
"sortBy": "Last *",
"sortDesc": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "8.4.5",
"targets": [
{
"expr": "apache_scoreboard{job=~\"$job\", instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ state }}",
"refId": "A",
"step": 240
}
],
"timeFrom": null,
"timeShift": null,
"title": "Apache scoreboard statuses",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "stepAfter",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
}
},
"gridPos": {
"h": 10,
"w": 12,
"x": 0,
"y": 3
},
"id": 8,
"links": [ ],
"options": {
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max",
"min"
],
"displayMode": "table",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.4.5",
"targets": [
{
"expr": "apache_workers{job=~\"$job\", instance=~\"$instance\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ state }}",
"step": 240
}
],
"title": "Apache worker statuses",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [ ]
},
"gridPos": {
"h": 10,
"w": 12,
"x": 12,
"y": 3
},
"id": 9,
"links": [ ],
"options": {
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max",
"min"
],
"displayMode": "table",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.4.5",
"targets": [
{
"expr": "apache_cpuload{job=~\"$job\", instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Load",
"refId": "A",
"step": 240
}
],
"title": "Apache CPU load",
"type": "timeseries"
}
],
"refresh": "1m",
"rows": [ ],
"schemaVersion": 14,
"style": "dark",
"tags": [
"apache-http-mixin"
],
"templating": {
"list": [
{
"hide": 0,
"label": "Data source",
"name": "prometheus_datasource",
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": ".+",
"current": {
"text": "",
"value": ""
},
"datasource": "$prometheus_datasource",
"hide": 0,
"includeAll": true,
"label": "job",
"multi": true,
"name": "job",
"options": [ ],
"query": "label_values(apache_up, job)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"text": "",
"value": ""
},
"datasource": "$prometheus_datasource",
"hide": 0,
"includeAll": false,
"label": "instance",
"multi": false,
"name": "instance",
"options": [ ],
"query": "label_values(apache_up{job=~\"$job\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "default",
"title": "Apache HTTP server",
"uid": "apache-http",
"version": 0
}

View file

@ -0,0 +1 @@
null

View file

@ -0,0 +1,52 @@
groups:
- name: apache-mesos
rules:
- alert: ApacheMesosHighMemoryUsage
annotations:
description: '{{ printf "%.0f" $value }} percent memory usage on {{$labels.mesos_cluster}},
which is above the threshold of 90.'
summary: There is a high memory usage for the cluster.
expr: |
min without(instance, job, type) (mesos_master_mem{type="percent"}) > 90
for: 5m
labels:
severity: warning
- alert: ApacheMesosHighDiskUsage
annotations:
description: '{{ printf "%.0f" $value }} percent disk usage on {{$labels.mesos_cluster}},
which is above the threshold of 90.'
summary: There is a high disk usage for the cluster.
expr: |
min without(instance, job, type) (mesos_master_disk{type="percent"}) > 90
for: 5m
labels:
severity: critical
- alert: ApacheMesosUnreachableTasks
annotations:
description: '{{ printf "%.0f" $value }} unreachable tasks on {{$labels.mesos_cluster}},
which is above the threshold of 3.'
summary: There are an unusually high number of unreachable tasks.
expr: |
max without(instance, job, state) (mesos_master_task_states_current{state="unreachable"}) > 3
for: 5m
labels:
severity: warning
- alert: ApacheMesosNoLeaderElected
annotations:
description: There is no cluster coordinator on {{$labels.mesos_cluster}}.
summary: There is currently no cluster coordinator.
expr: |
max without(instance, job) (mesos_master_elected) == 0
for: 1m
labels:
severity: critical
- alert: ApacheMesosInactiveAgents
annotations:
description: '{{ printf "%.0f" $value }} inactive agent clients over the last
5m which is above the threshold of 1.'
summary: There are currently inactive agent clients.
expr: |
max without(instance, job, state) (mesos_master_slaves_state{state=~"connected_inactive|disconnected_inactive"}) > 1
for: 5m
labels:
severity: warning

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1 @@
null

View file

@ -0,0 +1,93 @@
groups:
- name: apache-solr
rules:
- alert: ApacheSolrZookeeperChangeInEnsembleSize
annotations:
description: Zookeeper host {{$labels.zk_host}} has had an ensemble change of
{{ printf "%.0f" $value }} over the last 5 minutes
summary: Changes in the ZooKeeper ensemble size can affect the stability and
performance of the cluster.
expr: |
changes(solr_zookeeper_ensemble_size[5m]) > 0
for: 5m
labels:
severity: warning
- alert: ApacheSolrHighCPUUsageCritical
annotations:
description: '{{$labels.instance}} on cluster {{$labels.solr_cluster}} has had
a system CPU load of {{ printf "%.0f" $value }}%, which is above the threshold
of 85.'
summary: High CPU load can indicate that Solr nodes are under heavy load, potentially
impacting performance.
expr: |
100 * sum without (base_url, item) (avg_over_time(solr_metrics_jvm_os_cpu_load{item="systemCpuLoad"}[5m])) > 85
for: 5m
labels:
severity: critical
- alert: ApacheSolrHighCPUUsageWarning
annotations:
description: '{{$labels.instance}} on cluster {{$labels.solr_cluster}} has had
a system CPU load of {{ printf "%.0f" $value }}%, which is above the threshold
of 75.'
summary: High CPU load can indicate that Solr nodes are under heavy load, potentially
impacting performance.
expr: |
100 * sum without (base_url, item) (avg_over_time(solr_metrics_jvm_os_cpu_load{item="systemCpuLoad"}[5m])) > 75
for: 5m
labels:
severity: warning
- alert: ApacheSolrHighHeapMemoryUsageCritical
annotations:
description: |
{{$labels.instance}} on cluster {{$labels.solr_cluster}} has had high memory usage of {{ printf "%.0f" $value }}%, which is above the thresold of 75.
summary: High heap memory usage can lead to garbage collection issues, out-of-memory
errors, and overall system instability.
expr: |
100 * sum without(item, base_url)(solr_metrics_jvm_memory_heap_bytes{item="used"}) / clamp_min(sum without(item, base_url)(solr_metrics_jvm_memory_heap_bytes{item="max"}), 1) > 75
for: 5m
labels:
severity: critical
- alert: ApacheSolrHighHeapMemoryUsageWarning
annotations:
description: |
{{$labels.instance}} on cluster {{$labels.solr_cluster}} has had high memory usage of {{ printf "%.0f" $value }}%, which is above the thresold of 85.
summary: High heap memory usage can lead to garbage collection issues, out-of-memory
errors, and overall system instability.
expr: |
100 * sum without(item, base_url)(solr_metrics_jvm_memory_heap_bytes{item="used"}) / clamp_min(sum without(item, base_url)(solr_metrics_jvm_memory_heap_bytes{item="max"}), 1) > 85
for: 5m
labels:
severity: warning
- alert: ApacheSolrLowCacheHitRatio
annotations:
description: |
{{$labels.instance}} on cluster {{$labels.solr_cluster}} has had a low cache hit ratio of {{ printf "%.0f" $value }}% on core {{$labels.core}} of type {{$labels.type}}, which is under the threshold of 75.
summary: Low cache hit ratios can lead to increased disk I/O and slower query
response times.
expr: |
100 * sum without(base_url, category, collection, item, replica, shard) (solr_metrics_core_searcher_cache_ratio{item="hitratio", type=~"documentCache|filterCache|queryResultCache"}) < 75
for: 10m
labels:
severity: warning
- alert: ApacheSolrHighCoreErrors
annotations:
description: |
{{$labels.instance}} on cluster {{$labels.solr_cluster}} has had a high amount of core errors {{ printf "%.0f" $value }}% on core {{$labels.core}}, which is above the threshold of 15.
summary: A spike in core errors can indicate serious issues at the core level,
affecting data integrity and availability.
expr: |
100 * sum without(base_url, category, collection, handler, replica, shard) (increase(solr_metrics_core_errors_total[10m]) / clamp_min(avg_over_time(solr_metrics_core_errors_total[10m]), 1)) > 15
for: 10m
labels:
severity: warning
- alert: ApacheSolrHighDocumentIndexing
annotations:
description: |
{{$labels.instance}} on cluster {{$labels.solr_cluster}} has had a high document indexing value of {{ printf "%.0f" $value }}% on core {{$labels.core}}, which is above the threshold of 30.
summary: A sudden spike in document indexing could indicate unintended or malicious
bulk updates.
expr: |
100 * sum without(base_url, category, collection, handler, replica, shard) (increase(solr_metrics_core_update_handler_adds_total[15m]) / clamp_min(avg_over_time(solr_metrics_core_update_handler_adds_total[15m]), 1)) > 30
for: 15m
labels:
severity: warning

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,325 @@
{
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"apache-solr-mixin"
],
"targetBlank": false,
"title": "Other Apache Solr dashboards",
"type": "dashboards",
"url": ""
}
],
"panels": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"description": "Logs volume grouped by \"level\" label.",
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "bars",
"fillOpacity": 50,
"stacking": {
"mode": "normal"
}
},
"unit": "none"
},
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": "(E|e)merg|(F|f)atal|(A|a)lert|(C|c)rit.*"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "purple",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "(E|e)(rr.*|RR.*)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "red",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "(W|w)(arn.*|ARN.*|rn|RN)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "orange",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "(N|n)(otice|ote)|(I|i)(nf.*|NF.*)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "green",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "dbg.*|DBG.*|(D|d)(EBUG|ebug)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "blue",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "(T|t)(race|RACE)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "light-blue",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "logs"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "text",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 6,
"w": 24
},
"id": 1,
"interval": "30s",
"options": {
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "v10.0.0",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"expr": "sum by (level) (count_over_time({job=~\"integrations/apache-solr\",job=~\"$job\",solr_cluster=~\"$solr_cluster\",instance=~\"$instance\",level=~\"$level\",filename=~\"$filename\"}\n|~ \"$regex_search\"\n\n[$__interval]))\n",
"legendFormat": "{{ level }}"
}
],
"title": "Logs volume",
"transformations": [
{
"id": "renameByRegex",
"options": {
"regex": "Value",
"renamePattern": "logs"
}
}
],
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"gridPos": {
"h": 18,
"w": 24
},
"id": 2,
"options": {
"dedupStrategy": "exact",
"enableLogDetails": true,
"prettifyLogMessage": true,
"showTime": false,
"wrapLogMessage": true
},
"pluginVersion": "v10.0.0",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"expr": "{job=~\"integrations/apache-solr\",job=~\"$job\",solr_cluster=~\"$solr_cluster\",instance=~\"$instance\",level=~\"$level\",filename=~\"$filename\"} \n|~ \"$regex_search\"\n\n\n"
}
],
"title": "Logs",
"type": "logs"
}
],
"refresh": "1m",
"schemaVersion": 36,
"tags": [
"apache-solr-mixin"
],
"templating": {
"list": [
{
"label": "Loki data source",
"name": "loki_datasource",
"query": "loki",
"regex": "(?!grafanacloud.+usage-insights|grafanacloud.+alert-state-history).+",
"type": "datasource"
},
{
"allValue": ".*",
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"includeAll": true,
"label": "Job",
"multi": true,
"name": "job",
"query": "label_values({job=~\"integrations/apache-solr\"}, job)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"allValue": ".*",
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"includeAll": true,
"label": "Solr_cluster",
"multi": true,
"name": "solr_cluster",
"query": "label_values({job=~\"integrations/apache-solr\",job=~\"$job\"}, solr_cluster)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"allValue": ".*",
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"includeAll": true,
"label": "Instance",
"multi": true,
"name": "instance",
"query": "label_values({job=~\"integrations/apache-solr\",job=~\"$job\",solr_cluster=~\"$solr_cluster\"}, instance)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"allValue": ".*",
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"includeAll": true,
"label": "Level",
"multi": true,
"name": "level",
"query": "label_values({job=~\"integrations/apache-solr\",job=~\"$job\",solr_cluster=~\"$solr_cluster\",instance=~\"$instance\"}, level)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"allValue": ".*",
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"includeAll": true,
"label": "Filename",
"multi": true,
"name": "filename",
"query": "label_values({job=~\"integrations/apache-solr\",job=~\"$job\",solr_cluster=~\"$solr_cluster\",instance=~\"$instance\",level=~\"$level\"}, filename)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"current": {
"selected": false,
"text": "",
"value": ""
},
"label": "Regex search",
"name": "regex_search",
"options": [
{
"selected": true,
"text": "",
"value": ""
}
],
"query": "",
"type": "textbox"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timezone": "utc",
"title": "Apache Solr logs",
"uid": "apache-solr-logs-overview"
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1 @@
null

View file

@ -0,0 +1,47 @@
groups:
- name: ApacheTomcatAlerts
rules:
- alert: ApacheTomcatAlertsHighCpuUsage
annotations:
description: The CPU usage has been at {{ printf "%.0f" $value }} percent over
the last 5 minutes on {{$labels.instance}}, which is above the threshold of
80 percent.
summary: The instance has a CPU usage higher than the configured threshold.
expr: |
sum by (job, instance) (jvm_process_cpu_load{job=~"integrations/tomcat"}) > 80
for: 5m
labels:
severity: critical
- alert: ApacheTomcatAlertsHighMemoryUsage
annotations:
description: The memory usage has been at {{ printf "%.0f" $value }} percent
over the last 5 minutes on {{$labels.instance}}, which is above the threshold
of 80 percent.
summary: The instance has a higher memory usage than the configured threshold.
expr: |
sum(jvm_memory_usage_used_bytes{job=~"integrations/tomcat"}) by (job, instance) / sum(jvm_physical_memory_bytes{job=~"integrations/tomcat"}) by (job, instance) * 100 > 80
for: 5m
labels:
severity: critical
- alert: ApacheTomcatAlertsHighRequestErrorPercent
annotations:
description: The percentage of request errors has been at {{ printf "%.0f" $value
}} percent over the last 5 minutes on {{$labels.instance}}, which is above
the threshold of 5 percent.
summary: There are a high number of request errors.
expr: |
sum by (job, instance) (increase(tomcat_errorcount_total[5m]) / increase(tomcat_requestcount_total[5m]) * 100) > 5
for: 5m
labels:
severity: critical
- alert: ApacheTomcatAlertsModeratelyHighProcessingTime
annotations:
description: The processing time has been at {{ printf "%.0f" $value }}ms over
the last 5 minutes on {{$labels.instance}}, which is above the threshold of
300ms.
summary: The processing time has been moderately high.
expr: |
sum by (job, instance) (increase(tomcat_processingtime_total[5m]) / increase(tomcat_requestcount_total[5m])) > 300
for: 5m
labels:
severity: warning

View file

@ -0,0 +1,677 @@
{
"__inputs": [ ],
"__requires": [ ],
"annotations": {
"list": [ ]
},
"description": "",
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"apache-tomcat-mixin"
],
"targetBlank": false,
"title": "Other Apache Tomcat dashboards",
"type": "dashboards",
"url": ""
}
],
"panels": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "The number of different types of sessions created for a Tomcat host",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [ ]
},
"gridPos": {
"h": 10,
"w": 12,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum(increase(tomcat_session_sessioncounter_total{job=~\"$job\", instance=~\"$instance\", host=~\"$host\", context=~\"$context\"}[$__interval:] offset -$__interval)) by (job, instance)",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "{{instance}} - total sessions"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum(increase(tomcat_session_rejectedsessions_total{job=~\"$job\", instance=~\"$instance\", host=~\"$host\", context=~\"$context\"}[$__interval:] offset -$__interval)) by (job, instance)",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "{{instance}} - rejected"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum(increase(tomcat_session_expiredsessions_total{job=~\"$job\", instance=~\"$instance\", host=~\"$host\", context=~\"$context\"}[$__interval:] offset -$__interval)) by (job, instance)",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "{{instance}} - expired"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "increase(tomcat_session_sessioncounter_total{job=~\"$job\", instance=~\"$instance\", host=~\"$host\", context=~\"$context\"}[$__interval:] offset -$__interval)",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "{{instance}} - {{host}}{{context}} - sessions"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "increase(tomcat_session_rejectedsessions_total{job=~\"$job\", instance=~\"$instance\", host=~\"$host\", context=~\"$context\"}[$__interval:] offset -$__interval)",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "{{instance}} - {{host}}{{context}} - rejected"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "increase(tomcat_session_expiredsessions_total{job=~\"$job\", instance=~\"$instance\", host=~\"$host\", context=~\"$context\"}[$__interval:] offset -$__interval)",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "{{instance}} - {{host}}{{context}} - expired"
}
],
"title": "Sessions",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "The average time taken to process recent sessions for a Tomcat host",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "ms"
},
"overrides": [ ]
},
"gridPos": {
"h": 10,
"w": 12,
"x": 12,
"y": 0
},
"id": 3,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum(increase(tomcat_session_processingtime_total{job=~\"$job\", instance=~\"$instance\", host=~\"$host\", context=~\"$context\"}[$__interval:] offset -$__interval) / clamp_min(increase(tomcat_session_sessioncounter_total{job=~\"$job\", instance=~\"$instance\", host=~\"$host\", context=~\"$context\"}[$__interval:] offset -$__interval), 1)) by (job, instance)",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "{{instance}} - total"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "increase(tomcat_session_processingtime_total{job=~\"$job\", instance=~\"$instance\", host=~\"$host\", context=~\"$context\"}[$__interval:] offset -$__interval) / clamp_min(increase(tomcat_session_sessioncounter_total{job=~\"$job\", instance=~\"$instance\", host=~\"$host\", context=~\"$context\"}[$__interval:] offset -$__interval), 1)",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "{{instance}} - {{host}}{{context}}"
}
],
"title": "Session processing time",
"type": "timeseries"
},
{
"collapsed": false,
"datasource": {
"uid": "${prometheus_datasource}"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 10
},
"id": 4,
"targets": [ ],
"title": "Servlet",
"type": "row"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "The total requests and errors for a Tomcat servlet",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "r/s"
},
"overrides": [ ]
},
"gridPos": {
"h": 10,
"w": 12,
"x": 0,
"y": 11
},
"id": 5,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum(rate(tomcat_servlet_requestcount_total{instance=~\"$instance\", job=~\"$job\", module=~\"$host$context\", servlet=~\"$servlet\"}[$__rate_interval])) by (job, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} - total requests"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum(rate(tomcat_servlet_errorcount_total{instance=~\"$instance\", job=~\"$job\", module=~\"$host$context\", servlet=~\"$servlet\"}[$__rate_interval])) by (job, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} - total errors"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "rate(tomcat_servlet_requestcount_total{instance=~\"$instance\", job=~\"$job\", module=~\"$host$context\", servlet=~\"$servlet\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} - {{module}}{{servlet}} - requests"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "rate(tomcat_servlet_errorcount_total{instance=~\"$instance\", job=~\"$job\", module=~\"$host$context\", servlet=~\"$servlet\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} - {{module}}{{servlet}} - errors"
}
],
"title": "Servlet requests",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "The average time taken to process recent requests in a Tomcat servlet",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "ms"
},
"overrides": [ ]
},
"gridPos": {
"h": 10,
"w": 12,
"x": 12,
"y": 11
},
"id": 6,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum(increase(tomcat_servlet_processingtime_total{instance=~\"$instance\", job=~\"$job\", module=~\"$host$context\", servlet=~\"$servlet\"}[$__interval:] offset -$__interval) / clamp_min(increase(tomcat_servlet_requestcount_total{instance=~\"$instance\", job=~\"$job\", module=~\"$host$context\", servlet=~\"$servlet\"}[$__interval:] offset -$__interval), 1)) by (job, instance)",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "{{instance}} - total"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "increase(tomcat_servlet_processingtime_total{instance=~\"$instance\", job=~\"$job\", module=~\"$host$context\", servlet=~\"$servlet\"}[$__interval:] offset -$__interval) / clamp_min(increase(tomcat_servlet_requestcount_total{instance=~\"$instance\", job=~\"$job\", module=~\"$host$context\", servlet=~\"$servlet\"}[$__interval:] offset -$__interval), 1)",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "{{instance}} - {{module}}{{servlet}}"
}
],
"title": "Servlet processing time",
"type": "timeseries"
}
],
"refresh": "1m",
"rows": [ ],
"schemaVersion": 14,
"style": "dark",
"tags": [
"apache-tomcat-mixin"
],
"templating": {
"list": [
{
"current": { },
"hide": 0,
"label": "Data Source",
"name": "prometheus_datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": "",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 0,
"includeAll": false,
"label": "Job",
"multi": false,
"name": "job",
"options": [ ],
"query": "label_values(tomcat_bytesreceived_total, job)",
"refresh": 1,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": "",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 0,
"includeAll": false,
"label": "Instance",
"multi": false,
"name": "instance",
"options": [ ],
"query": "label_values(tomcat_bytesreceived_total, instance)",
"refresh": 1,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": "",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 0,
"includeAll": true,
"label": "Host",
"multi": true,
"name": "host",
"options": [ ],
"query": "label_values(tomcat_session_sessioncounter_total{instance=~\"$instance\"}, host)",
"refresh": 1,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": "",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 0,
"includeAll": true,
"label": "Context",
"multi": true,
"name": "context",
"options": [ ],
"query": "label_values(tomcat_session_sessioncounter_total{host=~\"$host\"}, context)",
"refresh": 1,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": "",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 0,
"includeAll": true,
"label": "Servlet",
"multi": true,
"name": "servlet",
"options": [ ],
"query": "label_values(tomcat_servlet_requestcount_total{module=~\"$host$context\"}, servlet)",
"refresh": 1,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "default",
"title": "Apache Tomcat hosts",
"uid": "apache-tomcat-hosts",
"version": 0
}

View file

@ -0,0 +1,979 @@
{
"__inputs": [ ],
"__requires": [ ],
"annotations": {
"list": [ ]
},
"description": "",
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"apache-tomcat-mixin"
],
"targetBlank": false,
"title": "Other Apache Tomcat dashboards",
"type": "dashboards",
"url": ""
}
],
"panels": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "The memory usage of the JVM of the instance",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": [ ]
},
"gridPos": {
"h": 6,
"w": 12,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "jvm_memory_usage_used_bytes{job=~\"$job\", instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} - {{area}}"
}
],
"title": "Memory usage",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "The CPU usage of the JVM process",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "line"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percentunit"
},
"overrides": [ ]
},
"gridPos": {
"h": 6,
"w": 12,
"x": 12,
"y": 0
},
"id": 3,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "jvm_process_cpu_load{job=~\"$job\", instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}"
}
],
"title": "CPU usage",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "The sent traffic for a Tomcat connector",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "Bps"
},
"overrides": [ ]
},
"gridPos": {
"h": 6,
"w": 12,
"x": 0,
"y": 6
},
"id": 4,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum(rate(tomcat_bytessent_total{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}[$__rate_interval])) by (job, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} - total"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "rate(tomcat_bytessent_total{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} - {{protocol}}-{{port}}"
}
],
"title": "Traffic sent",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "The received traffic for a Tomcat connector",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "Bps"
},
"overrides": [ ]
},
"gridPos": {
"h": 6,
"w": 12,
"x": 12,
"y": 6
},
"id": 5,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum(rate(tomcat_bytesreceived_total{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}[$__rate_interval])) by (job, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} - total"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "rate(tomcat_bytesreceived_total{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} - {{protocol}}-{{port}}"
}
],
"title": "Traffic received",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "The total requests and errors for a Tomcat connector",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "r/s"
},
"overrides": [ ]
},
"gridPos": {
"h": 6,
"w": 12,
"x": 0,
"y": 12
},
"id": 6,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum(rate(tomcat_requestcount_total{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}[$__rate_interval])) by (job, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} - total requests"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum(rate(tomcat_errorcount_total{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}[$__rate_interval])) by (job, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} - total errors"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "rate(tomcat_requestcount_total{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} - {{protocol}}-{{port}} - requests"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "rate(tomcat_errorcount_total{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} - {{protocol}}-{{port}} - errors"
}
],
"title": "Requests",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "The average time taken to process recent requests for a Tomcat connector",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "line"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 300
}
]
},
"unit": "ms"
},
"overrides": [ ]
},
"gridPos": {
"h": 6,
"w": 12,
"x": 12,
"y": 12
},
"id": 7,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum(increase(tomcat_processingtime_total{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}[$__interval:] offset -$__interval) / clamp_min(increase(tomcat_requestcount_total{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}[$__interval:] offset -$__interval), 1)) by (job, instance)",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "{{instance}} - total"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "increase(tomcat_processingtime_total{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}[$__interval:] offset -$__interval) / clamp_min(increase(tomcat_requestcount_total{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}[$__interval:] offset -$__interval), 1)",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "{{instance}} - {{protocol}}-{{port}}"
}
],
"title": "Processing time",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "The number of various threads being used by a Tomcat connector",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [ ]
},
"gridPos": {
"h": 6,
"w": 24,
"x": 0,
"y": 18
},
"id": 8,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum(tomcat_threadpool_connectioncount{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}) by (job, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} - total connections"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum(tomcat_threadpool_pollerthreadcount{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}) by (job, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} - poller total"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum(tomcat_threadpool_keepalivecount{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}) by (job, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} - idle total"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "sum(tomcat_threadpool_currentthreadcount{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}) by (job, instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} - active total"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "tomcat_threadpool_connectioncount{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} - {{protocol}}-{{port}} - connections"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "tomcat_threadpool_pollerthreadcount{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} - {{protocol}}-{{port}} - poller"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "tomcat_threadpool_keepalivecount{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} - {{protocol}}-{{port}} - idle"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"expr": "tomcat_threadpool_currentthreadcount{job=~\"$job\", instance=~\"$instance\", protocol=~\"$protocol\", port=~\"$port\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} - {{protocol}}-{{port}} - active"
}
],
"title": "Threads",
"type": "timeseries"
},
{
"datasource": {
"uid": "${loki_datasource}"
},
"description": "Recent logs from the Catalina.out logs file\n",
"gridPos": {
"h": 6,
"w": 24,
"x": 0,
"y": 24
},
"id": 9,
"options": {
"dedupStrategy": "none",
"enableLogDetails": true,
"prettifyLogMessage": false,
"showCommonLabels": false,
"showLabels": false,
"showTime": false,
"sortOrder": "Descending",
"wrapLogMessage": false
},
"targets": [
{
"datasource": {
"uid": "${loki_datasource}"
},
"editorMode": "code",
"expr": "{filename=~\"/var/log/tomcat.*/catalina.out|/opt/tomcat/logs/catalina.out|/Program Files/Apache Software Foundation/Tomcat .*..*/logs/catalina.out\",job=~\"$job\", instance=~\"$instance\"} |= ``",
"queryType": "range",
"refId": "A"
}
],
"title": "Logs",
"type": "logs"
}
],
"refresh": "1m",
"rows": [ ],
"schemaVersion": 14,
"style": "dark",
"tags": [
"apache-tomcat-mixin"
],
"templating": {
"list": [
{
"current": { },
"hide": 0,
"label": "Data Source",
"name": "prometheus_datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"current": { },
"hide": 0,
"label": "Loki Datasource",
"name": "loki_datasource",
"options": [ ],
"query": "loki",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": "",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 0,
"includeAll": false,
"label": "Job",
"multi": false,
"name": "job",
"options": [ ],
"query": "label_values(tomcat_bytesreceived_total, job)",
"refresh": 1,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": "",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 0,
"includeAll": false,
"label": "Instance",
"multi": false,
"name": "instance",
"options": [ ],
"query": "label_values(tomcat_bytesreceived_total, instance)",
"refresh": 1,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": "",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 0,
"includeAll": true,
"label": "Protocol",
"multi": true,
"name": "protocol",
"options": [ ],
"query": "label_values(tomcat_bytesreceived_total, protocol)",
"refresh": 1,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": "",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 0,
"includeAll": true,
"label": "Port",
"multi": true,
"name": "port",
"options": [ ],
"query": "label_values(tomcat_bytesreceived_total, port)",
"refresh": 1,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "default",
"title": "Apache Tomcat overview",
"uid": "apache-tomcat-overview",
"version": 0
}

View file

@ -0,0 +1 @@
null

30
assets/argocd/alerts.yaml Normal file
View file

@ -0,0 +1,30 @@
groups:
- name: ArgoCD
rules:
- alert: ArgoAppOutOfSync
annotations:
description: Application {{ $labels.name }} has sync status as {{ $labels.sync_status
}}.
summary: Application is OutOfSync.
expr: argocd_app_info{sync_status="OutOfSync"} == 1
for: 1m
labels:
severity: warning
- alert: ArgoAppSyncFailed
annotations:
description: Application {{ $labels.name }} has sync phase as {{ $labels.phase
}}.
summary: Application Sync Failed.
expr: argocd_app_sync_total{phase!="Succeeded"} == 1
for: 1m
labels:
severity: warning
- alert: ArgoAppMissing
annotations:
description: "ArgoCD has not reported any applications data for the past 15
minutes which means that it must be down or not functioning properly. \n"
summary: No reported applications in ArgoCD.
expr: absent(argocd_app_info)
for: 15m
labels:
severity: critical

File diff suppressed because it is too large Load diff

1
assets/argocd/rules.yaml Normal file
View file

@ -0,0 +1 @@
null

View file

@ -0,0 +1,47 @@
groups:
- name: AsteriskAlerts
rules:
- alert: AsteriskRestarted
annotations:
description: |-
Asterisk instance restarted in the last minute
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Asterisk instance restarted in the last minute.
expr: asterisk_core_uptime_seconds < 60
for: 5s
labels:
severity: critical
- alert: AsteriskReloaded
annotations:
description: |-
Asterisk instance reloaded in the last minute
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Asterisk instance reloaded in the last minute.
expr: asterisk_core_last_reload_seconds < 60
for: 5s
labels:
severity: warning
- alert: AsteriskHighScrapeTime
annotations:
description: |-
Asterisk instance core high scrape time (Possible system performance degradation)
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Asterisk instance core high scrape time.
expr: asterisk_core_scrape_time_ms > 100
for: 10s
labels:
severity: critical
- alert: AsteriskHighActiveCallsCount
annotations:
description: |-
Asterisk high active call count
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Asterisk high active call count.
expr: asterisk_calls_count > 100
for: 10s
labels:
severity: warning

View file

@ -0,0 +1,778 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [ ],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 2,
"iteration": 1645648005559,
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": false,
"keepTime": true,
"tags": [
"asterisk-integration"
],
"targetBlank": false,
"title": "Asterisk Dashboards",
"tooltip": "",
"type": "dashboards",
"url": ""
}
],
"liveNow": false,
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 2
},
"id": 8,
"panels": [ ],
"title": "Logs Overview",
"type": "row"
},
{
"gridPos": {
"h": 4,
"w": 4,
"x": 0,
"y": 3
},
"id": 16,
"options": {
"content": "<img style=\"margin: 20px;\" src=\"https://storage.googleapis.com/grafanalabs-integration-logos/asterisk.png\">",
"mode": "html"
},
"pluginVersion": "8.4.0",
"type": "text"
},
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"description": "Total number of log lines",
"fieldConfig": {
"defaults": {
"color": {
"fixedColor": "super-light-blue",
"mode": "fixed"
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "short"
},
"overrides": [ ]
},
"gridPos": {
"h": 4,
"w": 3,
"x": 4,
"y": 3
},
"id": 6,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"sum"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "8.4.0",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"expr": "count_over_time({job=\"$job\", filename=\"$filename\"}[$__interval])",
"refId": "A"
}
],
"title": "Total Log Lines",
"type": "stat"
},
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"description": "Total number of log lines with the type \"WARNING\"",
"fieldConfig": {
"defaults": {
"color": {
"fixedColor": "orange",
"mode": "fixed"
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "short"
},
"overrides": [ ]
},
"gridPos": {
"h": 4,
"w": 3,
"x": 7,
"y": 3
},
"id": 4,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"sum"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "8.4.0",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"expr": "count_over_time({job=\"$job\", filename=\"$filename\"} |= \"WARNING\" [$__interval])",
"refId": "A"
}
],
"title": "Warnings",
"type": "stat"
},
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"description": "Total number of log lines with the type \"ERROR\"",
"fieldConfig": {
"defaults": {
"color": {
"fixedColor": "red",
"mode": "fixed"
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "short"
},
"overrides": [ ]
},
"gridPos": {
"h": 4,
"w": 3,
"x": 10,
"y": 3
},
"id": 5,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"sum"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "8.4.0",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"expr": "count_over_time({job=\"$job\", filename=\"$filename\"} |= \"ERROR\" [$__interval])",
"refId": "A"
}
],
"title": "Errors",
"type": "stat"
},
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"description": "Total number of log lines with the type \"ERROR\"",
"fieldConfig": {
"defaults": {
"color": {
"fixedColor": "red",
"mode": "thresholds"
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "super-light-red",
"value": null
},
{
"color": "light-red",
"value": 25
},
{
"color": "dark-red",
"value": 50
}
]
},
"unit": "percent"
},
"overrides": [ ]
},
"gridPos": {
"h": 4,
"w": 4,
"x": 13,
"y": 3
},
"id": 13,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"sum"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "8.4.0",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"expr": "( count_over_time({job=\"$job\", filename=\"$filename\"} |= \"ERROR\" [$__interval]) / count_over_time({job=\"$job\", filename=\"$filename\"} [$__interval]) )",
"refId": "A"
}
],
"title": "Error Percentage",
"type": "stat"
},
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"description": "Total number of bytes used by the log file in the time period",
"fieldConfig": {
"defaults": {
"color": {
"fixedColor": "purple",
"mode": "fixed"
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "super-light-red",
"value": null
}
]
},
"unit": "bytes"
},
"overrides": [ ]
},
"gridPos": {
"h": 4,
"w": 4,
"x": 17,
"y": 3
},
"id": 14,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"sum"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "8.4.0",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"expr": "bytes_over_time({job=\"$job\", filename=\"$filename\"} [$__interval])",
"refId": "A"
}
],
"title": "Bytes Used",
"type": "stat"
},
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "fixed"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"fillOpacity": 50,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineWidth": 1,
"scaleDistribution": {
"type": "linear"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byFrameRefID",
"options": "A"
},
"properties": [
{
"id": "displayName",
"value": "Lines"
},
{
"id": "color",
"value": {
"fixedColor": "super-light-blue",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byFrameRefID",
"options": "B"
},
"properties": [
{
"id": "displayName",
"value": "Warnings"
},
{
"id": "color",
"value": {
"fixedColor": "orange",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byFrameRefID",
"options": "C"
},
"properties": [
{
"id": "displayName",
"value": "Errors"
},
{
"id": "color",
"value": {
"fixedColor": "red",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 7
},
"id": 12,
"interval": "10s",
"options": {
"barRadius": 0.25,
"barWidth": 0.69999999999999996,
"groupWidth": 0.5,
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom"
},
"orientation": "auto",
"showValue": "never",
"stacking": "none",
"tooltip": {
"mode": "multi",
"sort": "none"
},
"xTickLabelRotation": 0,
"xTickLabelSpacing": 100
},
"targets": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"expr": "count_over_time({job=\"$job\", filename=\"$filename\"}[$__interval])",
"refId": "A"
},
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"expr": "count_over_time({job=\"$job\", filename=\"$filename\"} |= \"WARNING\" [$__interval])",
"hide": false,
"refId": "B"
},
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"expr": "count_over_time({job=\"$job\", filename=\"$filename\"} |= \"ERROR\" [$__interval])",
"hide": false,
"refId": "C"
}
],
"title": "Historical Logs / Warnings / Errors",
"type": "barchart"
},
{
"collapsed": true,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 14
},
"id": 20,
"panels": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"description": "Errors from the log file",
"gridPos": {
"h": 12,
"w": 24,
"x": 0,
"y": 13
},
"id": 21,
"options": {
"dedupStrategy": "none",
"enableLogDetails": true,
"prettifyLogMessage": false,
"showCommonLabels": false,
"showLabels": false,
"showTime": false,
"sortOrder": "Descending",
"wrapLogMessage": true
},
"targets": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"expr": "{job=~\"$job\", filename=~\"$filename\"} |= \"ERROR\"",
"refId": "A"
}
],
"title": "Errors",
"type": "logs"
}
],
"title": "Errors",
"type": "row"
},
{
"collapsed": true,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 15
},
"id": 18,
"panels": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"description": "Warnings from the log file",
"gridPos": {
"h": 12,
"w": 24,
"x": 0,
"y": 14
},
"id": 22,
"options": {
"dedupStrategy": "none",
"enableLogDetails": true,
"prettifyLogMessage": false,
"showCommonLabels": false,
"showLabels": false,
"showTime": false,
"sortOrder": "Descending",
"wrapLogMessage": true
},
"targets": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"expr": "{job=~\"$job\", filename=~\"$filename\"} |= \"WARNING\"",
"refId": "A"
}
],
"title": "Warnings",
"type": "logs"
}
],
"title": "Warnings",
"type": "row"
},
{
"collapsed": true,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 16
},
"id": 10,
"panels": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"description": "*Asterisk Full Log File* The \"full\" log is the most detailed, describing each call in great detail.",
"gridPos": {
"h": 12,
"w": 24,
"x": 0,
"y": 16
},
"id": 2,
"options": {
"dedupStrategy": "none",
"enableLogDetails": true,
"prettifyLogMessage": false,
"showCommonLabels": false,
"showLabels": false,
"showTime": false,
"sortOrder": "Descending",
"wrapLogMessage": true
},
"targets": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"expr": "{job=~\"$job\", filename=~\"$filename\"}",
"refId": "A"
}
],
"title": "Full Log File",
"type": "logs"
}
],
"title": "Complete Log File",
"type": "row"
}
],
"refresh": "30s",
"schemaVersion": 35,
"style": "dark",
"tags": [
"asterisk-integration"
],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "Loki",
"value": "Loki"
},
"hide": 0,
"includeAll": false,
"label": "Data Source",
"multi": false,
"name": "loki_datasource",
"options": [ ],
"query": "loki",
"refresh": 1,
"regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+",
"skipUrlSync": false,
"type": "datasource"
},
{
"current": {
"selected": false,
"text": "integrations/asterisk-logs",
"value": "integrations/asterisk-logs"
},
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"definition": "label_values(job)",
"hide": 0,
"includeAll": true,
"label": "Job",
"multi": false,
"name": "job",
"options": [ ],
"query": "label_values(job)",
"refresh": 1,
"regex": "^.*asterisk.*",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"current": {
"selected": false,
"text": "/var/log/asterisk/full",
"value": "/var/log/asterisk/full"
},
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"definition": "label_values(filename)",
"hide": 0,
"includeAll": false,
"label": "File Name",
"multi": false,
"name": "filename",
"options": [ ],
"query": "label_values(filename)",
"refresh": 1,
"regex": ".*asterisk.+",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": { },
"timezone": "",
"title": "Asterisk - Logs",
"uid": "integration_asterisk_logs",
"version": 3,
"weekStart": ""
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1 @@
null

1
assets/awx/alerts.yaml Normal file
View file

@ -0,0 +1 @@
null

File diff suppressed because it is too large Load diff

1
assets/awx/rules.yaml Normal file
View file

@ -0,0 +1 @@
null

View file

@ -0,0 +1,37 @@
groups:
- name: blackbox-exporter.rules
rules:
- alert: BlackboxProbeFailed
annotations:
dashboard_url: https://grafana.com/d/blackbox-exporter-j4da/blackbox-exporter?instance={{
$labels.instance }}
description: The probe failed for the instance {{ $labels.instance }}.
summary: Probe has failed for the past 1m interval.
expr: |
probe_success{job="blackbox-exporter"} == 0
for: 1m
labels:
severity: critical
- alert: BlackboxLowUptime30d
annotations:
dashboard_url: https://grafana.com/d/blackbox-exporter-j4da/blackbox-exporter?instance={{
$labels.instance }}
description: The probe has a lower uptime than 99.9% the last 30 days for the
instance {{ $labels.instance }}.
summary: Probe uptime is lower than 99.9% for the last 30 days.
expr: |
avg_over_time(probe_success{job="blackbox-exporter"}[30d]) * 100 < 99.900000000000006
labels:
severity: info
- alert: BlackboxSslCertificateWillExpireSoon
annotations:
dashboard_url: https://grafana.com/d/blackbox-exporter-j4da/blackbox-exporter?instance={{
$labels.instance }}
description: |
The SSL certificate of the instance {{ $labels.instance }} is expiring within 21 days.
Actual time left: {{ $value | humanizeDuration }}.
summary: SSL certificate will expire soon.
expr: |
probe_ssl_earliest_cert_expiry{job="blackbox-exporter"} - time() < 21 * 24 * 3600
labels:
severity: warning

View file

@ -0,0 +1,947 @@
{
"__inputs": [ ],
"__requires": [ ],
"description": "A dashboard that monitors the Blackbox-exporter. It is created using the blackbox-exporter-mixin for the the (blackbox-exporter)[https://github.com/prometheus/blackbox-exporter].",
"editable": true,
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 1,
"title": "Summary",
"type": "row"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"links": [
{
"targetBlank": true,
"title": "Go To Probe",
"type": "link",
"url": "d/blackbox-exporter-j4da/blackbox-exporter?var-instance=${__field.labels.instance}&var-job=${__field.labels.job}"
}
],
"mappings": [
{
"options": {
"0": {
"color": "red",
"text": "Down"
},
"1": {
"color": "green",
"text": "Up"
}
},
"type": "value"
}
],
"unit": "short"
}
},
"gridPos": {
"h": 5,
"w": 24,
"x": 0,
"y": 1
},
"id": 2,
"maxDataPoints": 100,
"options": {
"colorMode": "background",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
},
"text": {
"titleSize": 18,
"valueSize": 18
},
"textMode": "value_and_name"
},
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "probe_success{\n job=~\"$job\"\n}\n",
"legendFormat": "{{instance}}"
}
],
"title": "Status Map",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"thresholds": {
"steps": [
{
"color": "red",
"value": 0
},
{
"color": "green",
"value": 0.001
}
]
},
"unit": "short"
}
},
"gridPos": {
"h": 4,
"w": 6,
"x": 0,
"y": 6
},
"id": 3,
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "count(\n probe_success{\n job=~\"$job\"\n }\n)\n"
}
],
"title": "Probes",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"thresholds": {
"steps": [
{
"color": "red",
"value": 0
},
{
"color": "yellow",
"value": 0.98999999999999999
},
{
"color": "green",
"value": 0.999
}
]
},
"unit": "percentunit"
}
},
"gridPos": {
"h": 4,
"w": 6,
"x": 6,
"y": 6
},
"id": 4,
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "(\n count(\n probe_success{\n job=~\"$job\"\n } == 1\n )\n OR vector(0)\n) /\ncount(\n probe_success{\n job=~\"$job\"\n }\n)\n"
}
],
"title": "Probes Success",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"thresholds": {
"steps": [
{
"color": "red",
"value": 0
},
{
"color": "green",
"value": 0.999
}
]
},
"unit": "percentunit"
}
},
"gridPos": {
"h": 4,
"w": 6,
"x": 12,
"y": 6
},
"id": 5,
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "count(\n probe_http_ssl{\n job=~\"$job\"\n } == 1\n) /\ncount(\n probe_http_version{\n job=~\"$job\"\n }\n)\n"
}
],
"title": "Probes SSL",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"unit": "s"
}
},
"gridPos": {
"h": 4,
"w": 6,
"x": 18,
"y": 6
},
"id": 6,
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "avg(\n probe_duration_seconds{\n job=~\"$job\"\n }\n)\n"
}
],
"title": "Probe Average Duration",
"type": "stat"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 10
},
"id": 7,
"repeat": "instance",
"title": "$instance",
"type": "row"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"thresholds": {
"steps": [
{
"color": "red",
"value": 0
},
{
"color": "yellow",
"value": 0.98999999999999999
},
{
"color": "green",
"value": 0.999
}
]
},
"unit": "percentunit"
}
},
"gridPos": {
"h": 4,
"w": 6,
"x": 0,
"y": 11
},
"id": 8,
"options": {
"colorMode": "background",
"reduceOptions": {
"calcs": [
"mean"
]
}
},
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "probe_success{\n job=~\"$job\",\n instance=~\"$instance\"\n}\n"
}
],
"title": "Uptime",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"mappings": [
{
"options": {
"0": {
"color": "red",
"text": "No"
},
"1": {
"color": "green",
"text": "Yes"
}
},
"type": "value"
}
],
"unit": "short"
}
},
"gridPos": {
"h": 3,
"w": 3,
"x": 0,
"y": 15
},
"id": 9,
"options": {
"colorMode": "background",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "probe_success{\n job=~\"$job\",\n instance=~\"$instance\"\n}\n",
"instant": true
}
],
"title": "Probe Success",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"thresholds": {
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "blue",
"value": 300
},
{
"color": "yellow",
"value": 400
},
{
"color": "red",
"value": 500
}
]
},
"unit": "short"
}
},
"gridPos": {
"h": 3,
"w": 3,
"x": 3,
"y": 15
},
"id": 10,
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "probe_http_status_code{\n job=~\"$job\",\n instance=~\"$instance\"\n}\n",
"instant": true
}
],
"title": "Latest Response Code",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"mappings": [
{
"options": {
"0": {
"color": "red",
"text": "No"
},
"1": {
"color": "green",
"text": "Yes"
}
},
"type": "value"
}
],
"unit": "short"
}
},
"gridPos": {
"h": 3,
"w": 3,
"x": 0,
"y": 18
},
"id": 11,
"options": {
"colorMode": "background",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "probe_http_ssl{\n job=~\"$job\",\n instance=~\"$instance\"\n}\n",
"instant": true
}
],
"title": "SSL",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"thresholds": {
"steps": [
{
"color": "red",
"value": 0
},
{
"color": "green",
"value": 1
}
]
},
"unit": "short"
}
},
"gridPos": {
"h": 3,
"w": 3,
"x": 3,
"y": 18
},
"id": 12,
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
]
},
"textMode": "name"
},
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "probe_tls_version_info{\n job=~\"$job\",\n instance=~\"$instance\"\n}\n",
"instant": true,
"legendFormat": "{{version}}"
}
],
"title": "SSL Version",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"thresholds": {
"steps": [
{
"color": "red",
"value": 0
},
{
"color": "green",
"value": 1814400
}
]
},
"unit": "s"
}
},
"gridPos": {
"h": 3,
"w": 6,
"x": 0,
"y": 21
},
"id": 13,
"options": {
"colorMode": "background",
"graphMode": "none"
},
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "probe_ssl_earliest_cert_expiry{\n job=~\"$job\",\n instance=~\"$instance\"\n} - time()\n"
}
],
"title": "SSL Certificate Expiry",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"mappings": [
{
"options": {
"0": {
"color": "green",
"text": "No"
},
"1": {
"color": "blue",
"text": "Yes"
}
},
"type": "value"
}
],
"unit": "short"
}
},
"gridPos": {
"h": 3,
"w": 3,
"x": 0,
"y": 24
},
"id": 14,
"options": {
"colorMode": "background",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "probe_http_redirects{\n job=~\"$job\",\n instance=~\"$instance\"\n}\n",
"instant": true
}
],
"title": "Redirects",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"unit": "short"
}
},
"gridPos": {
"h": 3,
"w": 3,
"x": 3,
"y": 24
},
"id": 15,
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "probe_http_version{\n job=~\"$job\",\n instance=~\"$instance\"\n}\n",
"instant": true,
"legendFormat": "{{version}}"
}
],
"title": "HTTP Version",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"unit": "s"
}
},
"gridPos": {
"h": 4,
"w": 3,
"x": 0,
"y": 27
},
"id": 16,
"options": {
"reduceOptions": {
"calcs": [
"mean"
]
}
},
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "probe_duration_seconds{\n job=~\"$job\",\n instance=~\"$instance\"\n}\n"
}
],
"title": "Average Latency",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"unit": "s"
}
},
"gridPos": {
"h": 4,
"w": 3,
"x": 3,
"y": 27
},
"id": 17,
"options": {
"reduceOptions": {
"calcs": [
"mean"
]
}
},
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "probe_dns_lookup_time_seconds{\n job=~\"$job\",\n instance=~\"$instance\"\n}\n"
}
],
"title": "Average Latency",
"type": "stat"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 10,
"spanNulls": false
},
"unit": "s"
}
},
"gridPos": {
"h": 10,
"w": 18,
"x": 6,
"y": 11
},
"id": 18,
"options": {
"legend": {
"calcs": [
"mean",
"max"
],
"displayMode": "table",
"placement": "right",
"showLegend": true,
"sortBy": "Mean",
"sortDesc": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum(\n probe_http_duration_seconds{\n job=~\"$job\",\n instance=~\"$instance\"\n }\n) by (instance)\n",
"legendFormat": "HTTP duration"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum(\n probe_duration_seconds{\n job=~\"$job\",\n instance=~\"$instance\"\n }\n) by (instance)\n",
"legendFormat": "Total probe duration"
}
],
"title": "Probe Duration",
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"fieldConfig": {
"defaults": {
"custom": {
"fillOpacity": 100,
"spanNulls": false,
"stacking": {
"mode": "percent"
}
},
"unit": "s"
}
},
"gridPos": {
"h": 10,
"w": 18,
"x": 6,
"y": 21
},
"id": 19,
"options": {
"legend": {
"calcs": [
"mean",
"max"
],
"displayMode": "table",
"placement": "right",
"showLegend": true,
"sortBy": "Mean",
"sortDesc": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "v10.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum(\n probe_http_duration_seconds{\n job=~\"$job\",\n instance=~\"$instance\"\n }\n) by (phase)\n",
"legendFormat": "{{ phase }}"
},
{
"datasource": {
"type": "prometheus",
"uid": "$datasource"
},
"expr": "sum(\n probe_icmp_duration_seconds{\n job=~\"$job\",\n instance=~\"$instance\"\n }\n) by (phase)\n",
"legendFormat": "{{ phase }}"
}
],
"title": "Probe Phases",
"type": "timeseries"
}
],
"schemaVersion": 36,
"tags": [
"blackbox-exporter",
"blackbox-exporter-mixin"
],
"templating": {
"list": [
{
"label": "Data source",
"name": "datasource",
"query": "prometheus",
"type": "datasource"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"includeAll": true,
"label": "Job",
"multi": true,
"name": "job",
"query": "label_values(probe_success{}, job)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"includeAll": false,
"label": "Instance",
"multi": false,
"name": "instance",
"query": "label_values(probe_success{job=~\"$job\"}, instance)",
"refresh": 2,
"sort": 1,
"type": "query"
}
]
},
"time": {
"from": "now-2d",
"to": "now"
},
"timezone": "utc",
"title": "Blackbox Exporter",
"uid": "blackbox-exporter-j4da"
}

View file

@ -0,0 +1 @@
null

1
assets/caddy/alerts.yaml Normal file
View file

@ -0,0 +1 @@
null

View file

@ -0,0 +1,761 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"limit": 100,
"name": "Annotations & Alerts",
"showIn": 0,
"target": {
"limit": 100,
"matchAny": false,
"tags": [ ],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"gnetId": 13460,
"graphTooltip": 0,
"id": 10,
"iteration": 1633116262227,
"links": [ ],
"panels": [
{
"datasource": {
"uid": "$datasource"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"log": 10,
"type": "log"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [ ]
},
"gridPos": {
"h": 11,
"w": 12,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.2.0",
"targets": [
{
"datasource": {
"uid": "$datasource"
},
"expr": "sum(rate(caddy_http_requests_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (handler)",
"interval": "",
"legendFormat": "{{handler}}",
"refId": "A"
}
],
"title": "Requests",
"type": "timeseries"
},
{
"datasource": {
"uid": "$datasource"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [ ]
},
"gridPos": {
"h": 11,
"w": 12,
"x": 12,
"y": 0
},
"id": 7,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.2.0",
"targets": [
{
"datasource": {
"uid": "$datasource"
},
"expr": "sum(irate(caddy_http_request_duration_seconds_count{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (code)",
"interval": "",
"legendFormat": "{{code}}",
"refId": "A"
}
],
"title": "Requests by Response Code",
"type": "timeseries"
},
{
"datasource": {
"uid": "$datasource"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [ ]
},
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 11
},
"id": 8,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.2.0",
"targets": [
{
"datasource": {
"uid": "$datasource"
},
"expr": "avg(avg_over_time(caddy_http_requests_in_flight{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (handler)",
"hide": false,
"interval": "",
"legendFormat": "{{handler}}",
"refId": "E"
}
],
"title": "Requests In Flight",
"type": "timeseries"
},
{
"datasource": {
"uid": "$datasource"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "bars",
"fillOpacity": 100,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "none"
},
"overrides": [ ]
},
"gridPos": {
"h": 9,
"w": 12,
"x": 12,
"y": 11
},
"id": 5,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.2.0",
"targets": [
{
"datasource": {
"uid": "$datasource"
},
"expr": "sum(irate(caddy_http_request_duration_seconds_count{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (code)",
"interval": "",
"legendFormat": "{{code}}",
"refId": "A"
}
],
"title": "Requests by Response Code (%)",
"type": "timeseries"
},
{
"datasource": {
"uid": "$datasource"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"log": 2,
"type": "log"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": [ ]
},
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 20
},
"id": 4,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.2.0",
"targets": [
{
"datasource": {
"uid": "$datasource"
},
"expr": "histogram_quantile(0.99, sum(rate(caddy_http_request_duration_seconds_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (le))",
"interval": "",
"legendFormat": "p99",
"refId": "A"
},
{
"datasource": {
"uid": "$datasource"
},
"expr": "histogram_quantile(0.95, sum(rate(caddy_http_request_duration_seconds_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (le))",
"interval": "",
"legendFormat": "p95",
"refId": "B"
},
{
"datasource": {
"uid": "$datasource"
},
"expr": "histogram_quantile(0.90, sum(rate(caddy_http_request_duration_seconds_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (le))",
"interval": "",
"legendFormat": "p90",
"refId": "C"
},
{
"datasource": {
"uid": "$datasource"
},
"expr": "histogram_quantile(0.75, sum(rate(caddy_http_request_duration_seconds_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (le))",
"interval": "",
"legendFormat": "p75",
"refId": "D"
},
{
"datasource": {
"uid": "$datasource"
},
"expr": "histogram_quantile(0.5, sum(rate(caddy_http_request_duration_seconds_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (le))",
"interval": "",
"legendFormat": "p50",
"refId": "E"
}
],
"title": "Request Duration (percentile)",
"type": "timeseries"
},
{
"cards": { },
"color": {
"cardColor": "#b4ff00",
"colorScale": "linear",
"colorScheme": "interpolateInferno",
"exponent": 0.5,
"mode": "spectrum"
},
"dataFormat": "tsbuckets",
"datasource": {
"uid": "$datasource"
},
"fieldConfig": {
"defaults": {
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"scaleDistribution": {
"type": "linear"
}
}
},
"overrides": [ ]
},
"gridPos": {
"h": 9,
"w": 12,
"x": 12,
"y": 20
},
"heatmap": { },
"hideZeroBuckets": true,
"highlightCards": true,
"id": 6,
"interval": "",
"legend": {
"show": true
},
"maxDataPoints": 25,
"options": {
"calculate": false,
"calculation": { },
"cellGap": 2,
"cellValues": { },
"color": {
"exponent": 0.5,
"fill": "#b4ff00",
"mode": "scheme",
"reverse": false,
"scale": "exponential",
"scheme": "Inferno",
"steps": 128
},
"exemplars": {
"color": "rgba(255,0,255,0.7)"
},
"filterValues": {
"le": 1.0000000000000001e-09
},
"legend": {
"show": true
},
"rowsFrame": {
"layout": "auto"
},
"showValue": "never",
"tooltip": {
"show": true,
"yHistogram": false
},
"yAxis": {
"axisPlacement": "left",
"reverse": false,
"unit": "s"
}
},
"pluginVersion": "10.2.0",
"reverseYBuckets": false,
"targets": [
{
"datasource": {
"uid": "$datasource"
},
"expr": "sum(increase(caddy_http_request_duration_seconds_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (le)",
"format": "heatmap",
"interval": "",
"legendFormat": "{{le}}",
"refId": "A"
}
],
"title": "Request Duration (heatmap)",
"tooltip": {
"show": true,
"showHistogram": false
},
"type": "heatmap",
"xAxis": {
"show": true
},
"yAxis": {
"format": "s",
"logBase": 1,
"show": true
},
"yBucketBound": "auto"
}
],
"refresh": "30s",
"schemaVersion": 38,
"style": "dark",
"tags": [
"caddy-integration"
],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "Prometheus",
"value": "Prometheus"
},
"hide": 0,
"includeAll": false,
"label": "Data source",
"multi": false,
"name": "datasource",
"options": [ ],
"query": "prometheus",
"queryValue": "",
"refresh": 1,
"regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+",
"skipUrlSync": false,
"type": "datasource"
},
{
"allValue": ".+",
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": {
"uid": "$datasource"
},
"definition": "",
"hide": 0,
"includeAll": true,
"label": "Job",
"multi": true,
"name": "job",
"options": [ ],
"query": "label_values(caddy_http_requests_total, job)",
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".+",
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": {
"uid": "$datasource"
},
"definition": "label_values(caddy_http_requests_total{job=~\"$job\"}, instance)",
"hide": 0,
"includeAll": true,
"label": "Instance",
"multi": true,
"name": "instance",
"options": [ ],
"query": {
"qryType": 1,
"query": "label_values(caddy_http_requests_total{job=~\"$job\"}, instance)",
"refId": "PrometheusVariableQueryEditor-VariableQuery"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-30m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
]
},
"timezone": "",
"title": "Caddy Overview",
"uid": "9B0qPnfMz",
"version": 9
}

1
assets/caddy/rules.yaml Normal file
View file

@ -0,0 +1 @@
null

View file

@ -0,0 +1,226 @@
groups:
- name: Cilium Endpoints
rules:
- alert: CiliumAgentEndpointFailures
annotations:
description: Cilium Agent {{$labels.pod}} has endpoints that are in an invalid
state. This may result in problems with scheduling Pods, or network connectivity
issues.
summary: Cilium Agent endpoints in the invalid state.
expr: sum(cilium_endpoint_state{endpoint_state="invalid"}) by (pod)
for: 5m
labels:
severity: warning
- alert: CiliumAgentEndpointUpdateFailure
annotations:
description: |-
API calls to Cilium Agent API to create or update Endpoints are failing on pod {{$labels.pod}} ({{$labels.method}} {{$labels.return_code}}).
This may cause problems for Pod scheduling
summary: API calls to Cilium Agent API to create or update Endpoints are failing.
expr: sum(rate(cilium_k8s_client_api_calls_total{method=~"(PUT|POST|PATCH)", endpoint="endpoint",return_code!~"2[0-9][0-9]"}[5m]))
by (pod, method, return_code)
for: 5m
labels:
severity: warning
- alert: CiliumAgentContainerNetworkInterfaceApiErrorEndpointCreate
annotations:
description: |-
Cilium Endpoint API endpoint rate limiter on Pod {{$labels.pod}} is reporting errors while doing endpoint create.
This may cause CNI and prevent Cilium scheduling.
summary: Cilium Endpoint API endpoint rate limiter is reporting errors while
doing endpoint create.
expr: sum(rate(cilium_api_limiter_processed_requests_total{api_call=~"endpoint-create",
outcome="fail"}[1m])) by (pod, api_call)
for: 5m
labels:
severity: info
- alert: CiliumAgentApiEndpointErrors
annotations:
description: |-
API calls to Cilium Endpoints API on Agent Pod {{$labels.pod}} are failing due to server errors ({{$labels.return_code}}).
This could indicate issues with Ciliums ability to create endpoints which can result in failure to schedule Kubernetes Pods.
summary: API calls to Cilium Endpoints API are failing due to server errors.
expr: sum(rate(cilium_agent_api_process_time_seconds_count{return_code=~"5[0-9][0-9]",
path="/v1/endpoint"}[5m])) by (pod, return_code)
for: 5m
labels:
severity: warning
- name: Cilium IPAM
rules:
- alert: CiliumOperatorExhaustedIpamIps
annotations:
description: |-
Cilium Operator {{$labels.pod}} has exhausted its IPAM IPs. This is a critical issue which may cause Pods to fail to be scheduled.
This may be caused by number of Pods being scheduled exceeding the you cloud platforms network limits or issues with Cilium rate limiting.
summary: Cilium Operator has exhausted its IPAM IPs.
expr: sum(cilium_operator_ipam_ips{type="available"}) by () <= 0
for: 5m
labels:
severity: critical
- alert: CiliumOperatorLowAvailableIpamIps
annotations:
description: |-
Cilium Operator {{$labels.pod}} has used up over 90% of its available IPs. If available IPs become exhausted then the operator may not be able to schedule Pods.
This may be caused by number of Pods being scheduled exceeding the you cloud platforms network limits or issues with Cilium rate limiting.
summary: Cilium Operator has used up over 90% of its available IPs.
expr: (sum(cilium_operator_ipam_ips{type!="available"}) by () / sum(cilium_operator_ipam_ips)
by ()) > 0.9
for: 5m
labels:
severity: warning
- alert: CiliumOperatorEniIpamErrors
annotations:
description: |-
Cilium Operator {{$labels.pod}} has high error rate while trying to create/attach ENIs for IPAM.
This may be caused by exceeding Node instance ENI/Address limts, as well as errors with Cilium Operators cloud configuration.
summary: Cilium Operator has high error rate while trying to create/attach ENIs
for IPAM.
expr: sum(rate(cilium_operator_ipam_interface_creation_ops{status=~"unable to
(create|attach) ENI"}[5m])) by () / count(rate(cilium_operator_ipam_interface_creation_ops{status=~"unable
to (create|attach) ENI"}[5m])) by () > 0.0
for: 10m
labels:
severity: critical
- name: Cilium Maps
rules:
- alert: CiliumAgentMapOperationFailures
annotations:
description: Cilium Agent {{$labels.pod}} is experiencing errors updating BPF
maps on Agent Pod {{$labels.pod}}. Effects may vary depending on map type(s)
being affected however this is likely to cause issues with Cilium.
summary: Cilium Agent is experiencing errors updating BPF maps on Agent Pod.
expr: sum(rate(cilium_bpf_map_ops_total{k8s_app="cilium", outcome="fail"}[5m]))
by (map_name, pod) > 0
for: 5m
labels:
severity: warning
- alert: CiliumAgentBpfMapPressure
annotations:
description: Map {{$labels.map_name}} on Cilium Agent Pod is currently experiencing
high map pressure. The map is currently over 90% full. Full maps will begin
to experience errors on updates which may result in unexpected behaviour.
summary: Map on Cilium Agent Pod is currently experiencing high map pressure.
expr: cilium_bpf_map_pressure{} > 0.9
for: 5m
labels:
severity: warning
- name: Cilium NAT
rules:
- alert: CiliumAgentNatTableFull
annotations:
description: |-
Cilium Agent Pod {{$labels.pod}} is dropping packets due to "No mapping for NAT masquerade" errors. This likely means that the Cilium agents NAT table is full.
This is a potentially critical issue that can lead to connection issues for packets leaving the cluster network.
See: https://docs.cilium.io/en/v1.9/concepts/networking/masquerading/ for more info.
summary: Cilium Agent Pod is dropping packets due to "No mapping for NAT masquerade"
errors.
expr: sum(rate(cilium_drop_count_total{reason="No mapping for NAT masquerade"}[1m]))
by (pod) > 0
for: 5m
labels:
severity: critical
- name: Cilium API
rules:
- alert: CiliumAgentApiHighErrorRate
annotations:
description: 'Cilium Agent API on Pod {{$labels.pod}} is experiencing a high
error rate for response code: {{$labels.response_code}} on endpoint {{$labels.endpoint}}.'
summary: Cilium Agent API on Pod is experiencing a high error rate.
expr: sum(rate(cilium_k8s_client_api_calls_total{endpoint!="metrics",return_code!~"2[0-9][0-9]"}[5m]))
by (pod, endpoint, return_code)
for: 5m
labels:
severity: info
- name: Cilium Conntrack
rules:
- alert: CiliumAgentConntrackTableFull
annotations:
description: |-
Ciliums conntrack map is failing on new insertions on agent Pod {{$labels.pod}}, this likely means that the conntrack BPF map is full. This is a potentially critical issue and may result in unexpected packet drops.
If this is firing, it is recommend to look at both CPU/memory resource utilization dashboards. As well as conntrack GC run dashboards for more details on what the issue is.
summary: Ciliums conntrack map is failing on new insertions on Agent Pod.
expr: 'sum(rate(cilium_drop_count_total{reason="CT: Map insertion failed"}[5m]))
by (pod) > 0'
for: 5m
labels:
severity: critical
- alert: CiliumAgentConnTrackFailedGarbageCollectorRuns
annotations:
description: |-
Cilium Agent Conntrack GC runs on Agent Pod {{$labels.pod}} has been reported as not completing. Runs reported "uncompleted" may indicate a problem with ConnTrack GC.
Cilium failing to GC its ConnTrack table may cause further ConnTrack issues later. This may result in dropped packets or other issues.
summary: Cilium Agent Conntrack GC runs are failing on Agent Pod.
expr: sum(rate(cilium_datapath_conntrack_gc_runs_total{status="uncompleted"}[5m]))
by (pod) > 0
for: 5m
labels:
severity: warning
- name: Cilium Drops
rules:
- alert: CiliumAgentHighDeniedRate
annotations:
description: Cilium Agent Pod {{$labels.pod}} is experiencing a high drop rate
due to policy rule denies. This could mean that a network policy is not configured
correctly, or that a Pod is sending unexpected network traffic
summary: Cilium Agent is experiencing a high drop rate due to policy rule denies.
expr: sum(rate(cilium_drop_count_total{reason="Policy denied"}[1m])) by (reason,
pod) > 0
for: 10m
labels:
severity: info
- name: Cilium Policy
rules:
- alert: CiliumAgentPolicyMapPressure
annotations:
description: 'Cilium Agent {{$labels.pod}} is experiencing high BPF map pressure
(over 90% full) on policy map: {{$labels.map_name}}. This means that the map
is running low on capacity. A full policy map may result in packet drops.'
summary: Cilium Agent is experiencing high BPF map pressure.
expr: sum(cilium_bpf_map_pressure{map_name=~"cilium_policy_.*"}) by (pod) > 0.9
for: 5m
labels:
severity: warning
- name: Cilium Identity
rules:
- alert: CiliumNodeLocalHighIdentityAllocation
annotations:
description: |-
Cilium agent Pod {{$labels.pod}} is using a very high percent (over 80%) of its maximum per-node identity limit (65535).
If this capacity is exhausted Cilium may be unable to allocate new identities. Very high identity allocations can also indicate other problems
summary: Cilium is using a very high percent (over 80%) of its maximum per-node
identity limit (65535).
expr: (sum(cilium_identity{type="node_local"}) by (pod) / (2^16-1)) > 0.8
for: 5m
labels:
severity: warning
- alert: RunningOutOfCiliumClusterIdentities
annotations:
description: Cilium is using a very high percent of its maximum cluster identity
limit ({{value}}/65280) . If this capacity is exhausted Cilium may be unable
to allocate new identities. Very high identity allocations can also indicate
other problems
summary: Cilium is using a very high percent of its maximum cluster identity
limit (65280).
expr: sum(cilium_identity{type="cluster_local"}) by () / (2^16-256) > .8
for: 5m
labels:
severity: warning
- name: Cilium Nodes
rules:
- alert: CiliumUnreachableNodes
annotations:
description: Cilium Agent {{$labels.pod}} is reporting unreachable Nodes in
the cluster.
summary: Cilium Agent is reporting unreachable Nodes in the cluster.
expr: sum(cilium_unreachable_nodes{}) by (pod) > 0
for: 15m
labels:
severity: info

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,657 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "Dashboard for Cilium v1.12 (https://cilium.io/) Agent metrics",
"editable": true,
"fiscalYearStartMonth": 0,
"gnetId": 16611,
"graphTooltip": 1,
"id": 3,
"iteration": 1664184399070,
"links": [
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"cilium-overview"
],
"targetBlank": false,
"title": "Cilium Overviews",
"tooltip": "",
"type": "dashboards",
"url": ""
},
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"cilium-agent"
],
"targetBlank": false,
"title": "Cilium Components",
"tooltip": "",
"type": "dashboards",
"url": ""
}
],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 100,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"log": 10,
"type": "log"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [ ],
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Max per node processingTime"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#e24d42",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Max per node upstreamTime"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#58140c",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "avg(cilium_policy_l7_parse_errors_total{pod=~\"cilium.*\"})"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#bf1b00",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "parse errors"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#bf1b00",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Max per node processingTime"
},
"properties": [
{
"id": "unit",
"value": "short"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Max per node upstreamTime"
},
"properties": [
{
"id": "unit",
"value": "short"
}
]
},
{
"matcher": {
"id": "byName",
"options": "avg(cilium_policy_l7_parse_errors_total{pod=~\"cilium.*\"})"
},
"properties": [
{
"id": "unit",
"value": "short"
}
]
},
{
"matcher": {
"id": "byName",
"options": "parse errors"
},
"properties": [
{
"id": "unit",
"value": "short"
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 10,
"x": 0,
"y": 0
},
"id": 94,
"links": [ ],
"options": {
"legend": {
"calcs": [
"mean"
],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "9.1.3-e1f2f3c",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "quantile(0.95, rate(cilium_proxy_upstream_reply_seconds_sum{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval]) / rate(cilium_proxy_upstream_reply_seconds_count{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (scope)",
"format": "time_series",
"hide": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{scope}}",
"range": true,
"refId": "A"
}
],
"title": "P95 Proxy Response Time",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 100,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [ ],
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Max per node processingTime"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#e24d42",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Max per node upstreamTime"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#58140c",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "avg(cilium_policy_l7_parse_errors_total{pod=~\"cilium.*\"})"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#bf1b00",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "parse errors"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#bf1b00",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Max per node processingTime"
},
"properties": [
{
"id": "unit",
"value": "short"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Max per node upstreamTime"
},
"properties": [
{
"id": "unit",
"value": "short"
}
]
},
{
"matcher": {
"id": "byName",
"options": "avg(cilium_policy_l7_parse_errors_total{pod=~\"cilium.*\"})"
},
"properties": [
{
"id": "unit",
"value": "short"
}
]
},
{
"matcher": {
"id": "byName",
"options": "parse errors"
},
"properties": [
{
"id": "unit",
"value": "short"
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 10,
"x": 10,
"y": 0
},
"id": 249,
"links": [ ],
"options": {
"legend": {
"calcs": [
"mean"
],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "9.1.3-e1f2f3c",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "avg(rate(cilium_proxy_upstream_reply_seconds_sum{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval]) / rate(cilium_proxy_upstream_reply_seconds_count{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (scope)",
"format": "time_series",
"hide": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{scope}}",
"range": true,
"refId": "A"
}
],
"title": "Avg Proxy Response Time",
"type": "timeseries"
}
],
"refresh": "30s",
"schemaVersion": 36,
"style": "dark",
"tags": [
"cilium-agent"
],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "default",
"value": "default"
},
"hide": 0,
"includeAll": false,
"label": "Data Source",
"multi": false,
"name": "prometheus_datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+",
"skipUrlSync": false,
"type": "datasource"
},
{
"current": {
"selected": false,
"text": [
"All"
],
"value": [
"$__all"
]
},
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"definition": "label_values(cilium_version, cluster)",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "cluster",
"options": [ ],
"query": {
"query": "label_values(cilium_version, cluster)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"current": {
"selected": false,
"text": [
"All"
],
"value": [
"$__all"
]
},
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"definition": "label_values(cilium_version{cluster=~\"$cluster\"}, namespace)",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "namespace",
"options": [ ],
"query": {
"query": "label_values(cilium_version{cluster=~\"$cluster\"}, namespace)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"allValue": "cilium.*",
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"definition": "label_values(cilium_version{cluster=~\"$cluster\", namespace=~\"$namespace\"}, pod)",
"hide": 0,
"includeAll": true,
"multi": false,
"name": "pod",
"options": [ ],
"query": {
"query": "label_values(cilium_version{cluster=~\"$cluster\", namespace=~\"$namespace\"}, pod)",
"refId": "StandardVariableQuery"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"current": {
"selected": false,
"text": "10",
"value": "10"
},
"hide": 0,
"label": "top k",
"name": "k",
"options": [
{
"selected": true,
"text": "10",
"value": "10"
}
],
"query": "10",
"skipUrlSync": false,
"type": "textbox"
}
]
},
"time": {
"from": "now-30m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "Cilium / Components / API",
"uid": "integrations_cilium_ent_api",
"version": 3,
"weekStart": ""
}

View file

@ -0,0 +1,792 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "Dashboard for Cilium v1.12 (https://cilium.io/) Agent metrics",
"editable": true,
"fiscalYearStartMonth": 0,
"gnetId": 16611,
"graphTooltip": 1,
"id": 3,
"iteration": 1664184399070,
"links": [
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"cilium-overview"
],
"targetBlank": false,
"title": "Cilium Overviews",
"tooltip": "",
"type": "dashboards",
"url": ""
},
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"cilium-agent"
],
"targetBlank": false,
"title": "Cilium Components",
"tooltip": "",
"type": "dashboards",
"url": ""
}
],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"log": 10,
"type": "log"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [ ],
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "ops"
},
"overrides": [ ]
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 0
},
"id": 262,
"links": [ ],
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "9.1.3-e1f2f3c",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "sum(rate(cilium_bpf_map_ops_total{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (map_name)",
"hide": false,
"legendFormat": "__auto",
"range": true,
"refId": "B"
}
],
"title": "BPF Map Operations",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"log": 10,
"type": "log"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [ ],
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "ops"
},
"overrides": [ ]
},
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 8
},
"id": 286,
"links": [ ],
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "9.1.3-e1f2f3c",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "sum(rate(cilium_bpf_map_ops_total{k8s_app=\"cilium\", pod=~\"$pod\", operation=\"delete\"}[$__rate_interval])) by (map_name)",
"hide": false,
"legendFormat": "__auto",
"range": true,
"refId": "B"
}
],
"title": "BPF Map Deletes",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"log": 10,
"type": "log"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [ ],
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "ops"
},
"overrides": [ ]
},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 8
},
"id": 285,
"links": [ ],
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "9.1.3-e1f2f3c",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "sum(rate(cilium_bpf_map_ops_total{k8s_app=\"cilium\", pod=~\"$pod\", operation=\"update\"}[$__rate_interval])) by (map_name)",
"hide": false,
"legendFormat": "__auto",
"range": true,
"refId": "B"
}
],
"title": "BPF Map Updates",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"links": [ ],
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 1
}
]
},
"unit": "Errors/Minute"
},
"overrides": [ ]
},
"gridPos": {
"h": 7,
"w": 6,
"x": 0,
"y": 15
},
"id": 244,
"links": [ ],
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "9.2.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "sum(rate(cilium_bpf_map_ops_total{k8s_app=\"cilium\", outcome=\"fail\",pod=~\"$pod\"}[$__rate_interval])) * 60",
"hide": false,
"legendFormat": "{{outcome}}",
"range": true,
"refId": "B"
}
],
"title": "BPF Map Operation Error Rate",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [ ],
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "ops"
},
"overrides": [ ]
},
"gridPos": {
"h": 7,
"w": 11,
"x": 6,
"y": 15
},
"id": 287,
"links": [ ],
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "9.1.5-0100a6a",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "topk($k, sum(rate(cilium_bpf_map_ops_total{k8s_app=\"cilium\", outcome=\"fail\",pod=~\"$pod\"}[$__rate_interval])) by (endpoint, pod, operation))",
"hide": false,
"legendFormat": "{{pod}} {{operation}} {{endpoint}}",
"range": true,
"refId": "B"
}
],
"title": "Top BPF Map Operation Failures",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"log": 10,
"type": "log"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [ ],
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "ops"
},
"overrides": [ ]
},
"gridPos": {
"h": 7,
"w": 7,
"x": 17,
"y": 15
},
"id": 243,
"links": [ ],
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "9.1.3-e1f2f3c",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "sum(rate(cilium_bpf_map_ops_total{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (operation, outcome)",
"hide": false,
"legendFormat": "{{operation}}: {{outcome}}",
"range": true,
"refId": "B"
}
],
"title": "Map Operation Outcomes",
"type": "timeseries"
}
],
"refresh": "30s",
"schemaVersion": 36,
"style": "dark",
"tags": [
"cilium-agent"
],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "default",
"value": "default"
},
"hide": 0,
"includeAll": false,
"label": "Data Source",
"multi": false,
"name": "prometheus_datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+",
"skipUrlSync": false,
"type": "datasource"
},
{
"current": {
"selected": false,
"text": [
"All"
],
"value": [
"$__all"
]
},
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"definition": "label_values(cilium_version, cluster)",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "cluster",
"options": [ ],
"query": {
"query": "label_values(cilium_version, cluster)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"current": {
"selected": false,
"text": [
"All"
],
"value": [
"$__all"
]
},
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"definition": "label_values(cilium_version{cluster=~\"$cluster\"}, namespace)",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "namespace",
"options": [ ],
"query": {
"query": "label_values(cilium_version{cluster=~\"$cluster\"}, namespace)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"allValue": "cilium.*",
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"definition": "label_values(cilium_version{cluster=~\"$cluster\", namespace=~\"$namespace\"}, pod)",
"hide": 0,
"includeAll": true,
"multi": false,
"name": "pod",
"options": [ ],
"query": {
"query": "label_values(cilium_version{cluster=~\"$cluster\", namespace=~\"$namespace\"}, pod)",
"refId": "StandardVariableQuery"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"current": {
"selected": false,
"text": "10",
"value": "10"
},
"hide": 0,
"label": "top k",
"name": "k",
"options": [
{
"selected": true,
"text": "10",
"value": "10"
}
],
"query": "10",
"skipUrlSync": false,
"type": "textbox"
}
]
},
"time": {
"from": "now-30m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "Cilium / Components / BPF",
"uid": "integrations_cilium_ent_bpf",
"version": 3,
"weekStart": ""
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,939 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "Dashboard for Cilium v1.12 (https://cilium.io/) Agent metrics",
"editable": true,
"fiscalYearStartMonth": 0,
"gnetId": 16611,
"graphTooltip": 1,
"id": 3,
"iteration": 1664184399070,
"links": [
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"cilium-overview"
],
"targetBlank": false,
"title": "Cilium Overviews",
"tooltip": "",
"type": "dashboards",
"url": ""
},
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"cilium-agent"
],
"targetBlank": false,
"title": "Cilium Components",
"tooltip": "",
"type": "dashboards",
"url": ""
}
],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [ ],
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "dump_interrupts conntrack ipv4"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#ea6460",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "dump_interrupts conntrack ipv6"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#58140c",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 79,
"links": [ ],
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "9.1.3-e1f2f3c",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "topk($k, sum(rate(cilium_datapath_conntrack_dump_resets_total{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (area, family, name, pod))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}} {{name}} {{area}} {{family}}",
"range": true,
"refId": "A"
}
],
"title": "Datapath Conntrack Dump Resets",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": -1,
"drawStyle": "line",
"fillOpacity": 100,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 0,
"pointSize": 5,
"scaleDistribution": {
"log": 10,
"type": "log"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [ ],
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "disconnecting"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#614d93",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "ready"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "rgba(81, 220, 95, 0.52)",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "waiting-to-regenerate"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#0a50a1",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 51,
"links": [ ],
"options": {
"legend": {
"calcs": [
"lastNotNull"
],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "9.1.3-e1f2f3c",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "sum(cilium_endpoint_state{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}) by (endpoint_state)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{endpoint_state}}",
"range": true,
"refId": "A"
}
],
"title": "Cilium Endpoint State",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [ ],
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "ops"
},
"overrides": [ ]
},
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 8
},
"id": 106,
"links": [ ],
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "9.1.3-e1f2f3c",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "sum(rate(cilium_services_events_total{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod, action)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{action}}",
"range": true,
"refId": "A"
}
],
"title": "Service Updates",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"links": [ ],
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "purple",
"value": null
}
]
},
"unit": "short"
},
"overrides": [ ]
},
"gridPos": {
"h": 9,
"w": 12,
"x": 12,
"y": 8
},
"id": 33,
"links": [ ],
"options": {
"displayMode": "gradient",
"minVizHeight": 10,
"minVizWidth": 0,
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showUnfilled": true
},
"pluginVersion": "9.2.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "sum(cilium_policy_endpoint_enforcement_status{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}) by (enforcement)",
"format": "time_series",
"hide": false,
"instant": true,
"interval": "1s",
"intervalFactor": 1,
"legendFormat": "{{enforcement}}",
"refId": "B"
}
],
"title": "Endpoints Policy Enforcement Status",
"type": "bargauge"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 100,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [ ],
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "opm"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "fail"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#bf1b00",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "fail/min"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#890f02",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "success"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#447ebc",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "success/min"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#3f6833",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 17
},
"id": 49,
"links": [ ],
"options": {
"legend": {
"calcs": [
"mean",
"max"
],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "9.1.3-e1f2f3c",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "sum(rate(cilium_endpoint_regenerations_total{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by(outcome)",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"legendFormat": "{{outcome}}",
"refId": "A"
}
],
"title": "Endpoint Regenerations",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "bars",
"fillOpacity": 100,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [ ],
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": [ ]
},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 17
},
"id": 55,
"links": [ ],
"options": {
"legend": {
"calcs": [
"mean"
],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "9.1.3-e1f2f3c",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "quantile(0.95, rate(cilium_endpoint_regeneration_time_stats_seconds_sum{pod=\"$pod\"}[$__rate_interval]) / rate(cilium_endpoint_regeneration_time_stats_seconds_count{pod=\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"hide": false,
"intervalFactor": 1,
"legendFormat": "{{scope}}",
"range": true,
"refId": "A"
}
],
"title": "P95 Endpoint Regeneration Time",
"type": "timeseries"
}
],
"refresh": "30s",
"schemaVersion": 36,
"style": "dark",
"tags": [
"cilium-agent"
],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "default",
"value": "default"
},
"hide": 0,
"includeAll": false,
"label": "Data Source",
"multi": false,
"name": "prometheus_datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+",
"skipUrlSync": false,
"type": "datasource"
},
{
"current": {
"selected": false,
"text": [
"All"
],
"value": [
"$__all"
]
},
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"definition": "label_values(cilium_version, cluster)",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "cluster",
"options": [ ],
"query": {
"query": "label_values(cilium_version, cluster)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"current": {
"selected": false,
"text": [
"All"
],
"value": [
"$__all"
]
},
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"definition": "label_values(cilium_version{cluster=~\"$cluster\"}, namespace)",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "namespace",
"options": [ ],
"query": {
"query": "label_values(cilium_version{cluster=~\"$cluster\"}, namespace)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"allValue": "cilium.*",
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"definition": "label_values(cilium_version{cluster=~\"$cluster\", namespace=~\"$namespace\"}, pod)",
"hide": 0,
"includeAll": true,
"multi": false,
"name": "pod",
"options": [ ],
"query": {
"query": "label_values(cilium_version{cluster=~\"$cluster\", namespace=~\"$namespace\"}, pod)",
"refId": "StandardVariableQuery"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"current": {
"selected": false,
"text": "10",
"value": "10"
},
"hide": 0,
"label": "top k",
"name": "k",
"options": [
{
"selected": true,
"text": "10",
"value": "10"
}
],
"query": "10",
"skipUrlSync": false,
"type": "textbox"
}
]
},
"time": {
"from": "now-30m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "Cilium / Components / Datapath",
"uid": "integrations_cilium_ent_datapath",
"version": 3,
"weekStart": ""
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,417 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "Dashboard for Cilium v1.12 (https://cilium.io/) Agent metrics",
"editable": true,
"fiscalYearStartMonth": 0,
"gnetId": 16611,
"graphTooltip": 1,
"id": 3,
"iteration": 1664184399070,
"links": [
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"cilium-overview"
],
"targetBlank": false,
"title": "Cilium Overviews",
"tooltip": "",
"type": "dashboards",
"url": ""
},
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"cilium-agent"
],
"targetBlank": false,
"title": "Cilium Components",
"tooltip": "",
"type": "dashboards",
"url": ""
}
],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "bars",
"fillOpacity": 53,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [ ],
"mappings": [ ],
"thresholds": {
"mode": "percentage",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "Identities"
},
"overrides": [ ]
},
"gridPos": {
"h": 11,
"w": 11,
"x": 0,
"y": 0
},
"id": 259,
"links": [ ],
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "9.1.5-0100a6a",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "sum(cilium_identity{pod=~\"$pod\"}) by (type)",
"hide": false,
"legendFormat": "{{type}}",
"range": true,
"refId": "B"
}
],
"title": "Allocated Identities",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"description": "Number identities allocated by type.\n\nLimits for identity allocations are:\n\nMax(cluster_local)=65280\nMax(node_local)=65535.\n\nRunning out of identities is a potentially critical issue.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"links": [ ],
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "#EAB839",
"value": 60000
},
{
"color": "red",
"value": 65280
}
]
},
"unit": "Identities"
},
"overrides": [ ]
},
"gridPos": {
"h": 11,
"w": 13,
"x": 11,
"y": 0
},
"id": 304,
"links": [ ],
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "9.2.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "sum(cilium_identity{pod=~\"$pod\"}) by (type)",
"hide": false,
"legendFormat": "{{type}}",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "sum(cilium_identity{pod=~\"$pod\"})",
"hide": false,
"legendFormat": "total",
"range": true,
"refId": "A"
}
],
"title": "Allocated Identities",
"type": "stat"
}
],
"refresh": "30s",
"schemaVersion": 36,
"style": "dark",
"tags": [
"cilium-agent"
],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "default",
"value": "default"
},
"hide": 0,
"includeAll": false,
"label": "Data Source",
"multi": false,
"name": "prometheus_datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+",
"skipUrlSync": false,
"type": "datasource"
},
{
"current": {
"selected": false,
"text": [
"All"
],
"value": [
"$__all"
]
},
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"definition": "label_values(cilium_version, cluster)",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "cluster",
"options": [ ],
"query": {
"query": "label_values(cilium_version, cluster)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"current": {
"selected": false,
"text": [
"All"
],
"value": [
"$__all"
]
},
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"definition": "label_values(cilium_version{cluster=~\"$cluster\"}, namespace)",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "namespace",
"options": [ ],
"query": {
"query": "label_values(cilium_version{cluster=~\"$cluster\"}, namespace)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"allValue": "cilium.*",
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"definition": "label_values(cilium_version{cluster=~\"$cluster\", namespace=~\"$namespace\"}, pod)",
"hide": 0,
"includeAll": true,
"multi": false,
"name": "pod",
"options": [ ],
"query": {
"query": "label_values(cilium_version{cluster=~\"$cluster\", namespace=~\"$namespace\"}, pod)",
"refId": "StandardVariableQuery"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"current": {
"selected": false,
"text": "10",
"value": "10"
},
"hide": 0,
"label": "top k",
"name": "k",
"options": [
{
"selected": true,
"text": "10",
"value": "10"
}
],
"query": "10",
"skipUrlSync": false,
"type": "textbox"
}
]
},
"time": {
"from": "now-30m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "Cilium / Components / Identities",
"uid": "integrations_cilium_ent_identities",
"version": 3,
"weekStart": ""
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,433 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "Dashboard for Cilium v1.12 (https://cilium.io/) Agent metrics",
"editable": true,
"fiscalYearStartMonth": 0,
"gnetId": 16611,
"graphTooltip": 1,
"id": 3,
"iteration": 1664184399070,
"links": [
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"cilium-overview"
],
"targetBlank": false,
"title": "Cilium Overviews",
"tooltip": "",
"type": "dashboards",
"url": ""
},
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"cilium-agent"
],
"targetBlank": false,
"title": "Cilium Components",
"tooltip": "",
"type": "dashboards",
"url": ""
}
],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"links": [ ],
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "ipv4"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#5195ce",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "ipv6"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#6d1f62",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 0
},
"id": 87,
"links": [ ],
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "9.2.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "sum(cilium_ip_addresses{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}) by (family)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{family}}",
"range": true,
"refId": "A"
}
],
"title": "Allocated Addresses",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [ ],
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [ ]
},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 0
},
"id": 89,
"links": [ ],
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "9.1.3-e1f2f3c",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "topk($k, sum(cilium_unreachable_health_endpoints{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}) by (pod))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
"range": true,
"refId": "B"
}
],
"title": "Reported Unreachable Health Endpoints",
"type": "timeseries"
}
],
"refresh": "30s",
"schemaVersion": 36,
"style": "dark",
"tags": [
"cilium-agent"
],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "default",
"value": "default"
},
"hide": 0,
"includeAll": false,
"label": "Data Source",
"multi": false,
"name": "prometheus_datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+",
"skipUrlSync": false,
"type": "datasource"
},
{
"current": {
"selected": false,
"text": [
"All"
],
"value": [
"$__all"
]
},
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"definition": "label_values(cilium_version, cluster)",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "cluster",
"options": [ ],
"query": {
"query": "label_values(cilium_version, cluster)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"current": {
"selected": false,
"text": [
"All"
],
"value": [
"$__all"
]
},
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"definition": "label_values(cilium_version{cluster=~\"$cluster\"}, namespace)",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "namespace",
"options": [ ],
"query": {
"query": "label_values(cilium_version{cluster=~\"$cluster\"}, namespace)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"allValue": "cilium.*",
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"definition": "label_values(cilium_version{cluster=~\"$cluster\", namespace=~\"$namespace\"}, pod)",
"hide": 0,
"includeAll": true,
"multi": false,
"name": "pod",
"options": [ ],
"query": {
"query": "label_values(cilium_version{cluster=~\"$cluster\", namespace=~\"$namespace\"}, pod)",
"refId": "StandardVariableQuery"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"current": {
"selected": false,
"text": "10",
"value": "10"
},
"hide": 0,
"label": "top k",
"name": "k",
"options": [
{
"selected": true,
"text": "10",
"value": "10"
}
],
"query": "10",
"skipUrlSync": false,
"type": "textbox"
}
]
},
"time": {
"from": "now-30m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "Cilium / Components / Network",
"uid": "integrations_cilium_ent_network",
"version": 3,
"weekStart": ""
}

View file

@ -0,0 +1,586 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "Dashboard for Cilium v1.12 (https://cilium.io/) Agent metrics",
"editable": true,
"fiscalYearStartMonth": 0,
"gnetId": 16611,
"graphTooltip": 1,
"id": 3,
"iteration": 1664184399070,
"links": [
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"cilium-overview"
],
"targetBlank": false,
"title": "Cilium Overviews",
"tooltip": "",
"type": "dashboards",
"url": ""
},
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"cilium-agent"
],
"targetBlank": false,
"title": "Cilium Components",
"tooltip": "",
"type": "dashboards",
"url": ""
}
],
"liveNow": false,
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 226,
"panels": [ ],
"title": "Cilium Nodes",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 35,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [ ],
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "ops"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Avg"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "#cca300",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Max"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "rgb(167, 150, 111)",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Max"
},
"properties": [
{
"id": "custom.fillBelowTo",
"value": "Min"
},
{
"id": "custom.lineWidth",
"value": 0
}
]
},
{
"matcher": {
"id": "byName",
"options": "Min"
},
"properties": [
{
"id": "custom.lineWidth",
"value": 0
}
]
},
{
"matcher": {
"id": "byName",
"options": "add k8s"
},
"properties": [
{
"id": "unit",
"value": "short"
}
]
},
{
"matcher": {
"id": "byName",
"options": "delete k8s"
},
"properties": [
{
"id": "unit",
"value": "short"
}
]
},
{
"matcher": {
"id": "byName",
"options": "update k8s"
},
"properties": [
{
"id": "unit",
"value": "short"
}
]
},
{
"matcher": {
"id": "byName",
"options": "add local-node"
},
"properties": [
{
"id": "unit",
"value": "short"
}
]
}
]
},
"gridPos": {
"h": 12,
"w": 18,
"x": 0,
"y": 1
},
"id": 93,
"links": [ ],
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "9.1.3-e1f2f3c",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "avg(rate(cilium_nodes_all_events_received_total{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (event_type, source)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{eventType}} {{source}}",
"range": true,
"refId": "B"
}
],
"title": "Node Events",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"links": [ ],
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "blue",
"value": null
}
]
},
"unit": "short"
},
"overrides": [ ]
},
"gridPos": {
"h": 6,
"w": 6,
"x": 18,
"y": 1
},
"id": 91,
"links": [ ],
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "9.2.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "sum(cilium_nodes_all_num{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"})",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Nodes",
"range": true,
"refId": "A"
}
],
"title": "Cilium Nodes",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"links": [ ],
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 1
}
]
},
"unit": "short"
},
"overrides": [ ]
},
"gridPos": {
"h": 6,
"w": 6,
"x": 18,
"y": 7
},
"id": 218,
"links": [ ],
"options": {
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"pluginVersion": "9.2.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "sum(cilium_unreachable_nodes{k8s_app=\"cilium\", cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"})",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
"range": true,
"refId": "A"
}
],
"title": "Unreachable Cilium Nodes",
"type": "gauge"
}
],
"refresh": "30s",
"schemaVersion": 36,
"style": "dark",
"tags": [
"cilium-agent"
],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "default",
"value": "default"
},
"hide": 0,
"includeAll": false,
"label": "Data Source",
"multi": false,
"name": "prometheus_datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+",
"skipUrlSync": false,
"type": "datasource"
},
{
"current": {
"selected": false,
"text": [
"All"
],
"value": [
"$__all"
]
},
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"definition": "label_values(cilium_version, cluster)",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "cluster",
"options": [ ],
"query": {
"query": "label_values(cilium_version, cluster)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"current": {
"selected": false,
"text": [
"All"
],
"value": [
"$__all"
]
},
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"definition": "label_values(cilium_version{cluster=~\"$cluster\"}, namespace)",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "namespace",
"options": [ ],
"query": {
"query": "label_values(cilium_version{cluster=~\"$cluster\"}, namespace)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"allValue": "cilium.*",
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"definition": "label_values(cilium_version{cluster=~\"$cluster\", namespace=~\"$namespace\"}, pod)",
"hide": 0,
"includeAll": true,
"multi": false,
"name": "pod",
"options": [ ],
"query": {
"query": "label_values(cilium_version{cluster=~\"$cluster\", namespace=~\"$namespace\"}, pod)",
"refId": "StandardVariableQuery"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"current": {
"selected": false,
"text": "10",
"value": "10"
},
"hide": 0,
"label": "top k",
"name": "k",
"options": [
{
"selected": true,
"text": "10",
"value": "10"
}
],
"query": "10",
"skipUrlSync": false,
"type": "textbox"
}
]
},
"time": {
"from": "now-30m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "Cilium / Components / Nodes",
"uid": "integrations_cilium_ent_nodes",
"version": 3,
"weekStart": ""
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1 @@
null

View file

@ -0,0 +1,44 @@
groups:
- name: ClickHouseAlerts
rules:
- alert: ClickHouseReplicationQueueBackingUp
annotations:
description: |
ClickHouse replication tasks are processing slower than expected on {{ $labels.instance }} causing replication queue size to back up at {{ $value }} exceeding the threshold value of 99.
summary: ClickHouse replica max queue size backing up.
expr: |
ClickHouseAsyncMetrics_ReplicasMaxQueueSize > 99
for: 5m
keep_firing_for: 5m
labels:
severity: warning
- alert: ClickHouseRejectedInserts
annotations:
description: ClickHouse inserts are being rejected on {{ $labels.instance }}
as items are being inserted faster than ClickHouse is able to merge them.
summary: ClickHouse has too many rejected inserts.
expr: ClickHouseProfileEvents_RejectedInserts > 1
for: 5m
keep_firing_for: 5m
labels:
severity: critical
- alert: ClickHouseZookeeperSessions
annotations:
description: |
ClickHouse has more than one connection to a Zookeeper on {{ $labels.instance }} which can lead to bugs due to stale reads in Zookeepers consistency model.
summary: ClickHouse has too many Zookeeper sessions.
expr: ClickHouseMetrics_ZooKeeperSession > 1
for: 5m
keep_firing_for: 5m
labels:
severity: critical
- alert: ClickHouseReplicasInReadOnly
annotations:
description: |
ClickHouse has replicas in a read only state on {{ $labels.instance }} after losing connection to Zookeeper or at startup.
summary: ClickHouse has too many replicas in read only state.
expr: ClickHouseMetrics_ReadonlyReplica > 0
for: 5m
keep_firing_for: 5m
labels:
severity: critical

View file

@ -0,0 +1,616 @@
{
"__inputs": [ ],
"__requires": [ ],
"annotations": {
"list": [ ]
},
"editable": false,
"gnetId": null,
"graphTooltip": 1,
"hideControls": false,
"id": null,
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"clickhouse-mixin"
],
"targetBlank": false,
"title": "Other ClickHouse dashboards",
"type": "dashboards",
"url": ""
}
],
"panels": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Time spent waiting for read syscall",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "µs"
},
"overrides": [ ]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"editorMode": "builder",
"expr": "increase(ClickHouseProfileEvents_DiskReadElapsedMicroseconds{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
"legendFormat": "{{ instance }} - disk read elapsed",
"range": true,
"refId": "A"
}
],
"title": "Disk read latency",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Time spent waiting for write syscall",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "µs"
},
"overrides": [ ]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 3,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"editorMode": "builder",
"expr": "increase(ClickHouseProfileEvents_DiskWriteElapsedMicroseconds{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
"legendFormat": "{{ instance }} - disk write elapsed",
"range": true,
"refId": "A"
}
],
"title": "Disk write latency",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Latency of inbound network traffic",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "µs"
},
"overrides": [ ]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 4,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"editorMode": "builder",
"expr": "increase(ClickHouseProfileEvents_NetworkReceiveElapsedMicroseconds{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
"legendFormat": "{{ instance }} - network receive elapsed",
"range": true,
"refId": "A"
}
],
"title": "Network receive latency",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Latency of outbound network traffic",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "µs"
},
"overrides": [ ]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 8
},
"id": 5,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"editorMode": "builder",
"expr": "increase(ClickHouseProfileEvents_NetworkSendElapsedMicroseconds{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
"legendFormat": "{{ instance }} - network send elapsed",
"range": true,
"refId": "A"
}
],
"title": "Network transmit latency",
"type": "timeseries"
},
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"description": "Time spent waiting for ZooKeeper request to process",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [ ],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "µs"
},
"overrides": [ ]
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 16
},
"id": 6,
"options": {
"legend": {
"calcs": [ ],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"uid": "${prometheus_datasource}"
},
"editorMode": "builder",
"expr": "increase(ClickHouseProfileEvents_ZooKeeperWaitMicroseconds{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
"legendFormat": "{{ instance }} - ZooKeeper wait",
"range": true,
"refId": "A"
}
],
"title": "ZooKeeper wait time",
"type": "timeseries"
}
],
"refresh": "1m",
"rows": [ ],
"schemaVersion": 14,
"style": "dark",
"tags": [
"clickhouse-mixin"
],
"templating": {
"list": [
{
"current": { },
"hide": 0,
"label": "Data source",
"name": "prometheus_datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": ".+",
"current": {
"text": "",
"value": ""
},
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 0,
"includeAll": true,
"label": "job",
"multi": true,
"name": "job",
"options": [ ],
"query": "label_values(ClickHouseProfileEvents_DiskReadElapsedMicroseconds,job)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"text": "",
"value": ""
},
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 0,
"includeAll": true,
"label": "instance",
"multi": false,
"name": "instance",
"options": [ ],
"query": "label_values(ClickHouseProfileEvents_DiskReadElapsedMicroseconds{job=~\"$job\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": "",
"current": { },
"datasource": {
"uid": "${prometheus_datasource}"
},
"hide": 2,
"includeAll": true,
"label": "Cluster",
"multi": true,
"name": "cluster",
"options": [ ],
"query": "label_values(ClickHouseProfileEvents_DiskReadElapsedMicroseconds{job=~\"$job\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-30m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "default",
"title": "ClickHouse latency",
"uid": "clickhouse-latency",
"version": 0
}

View file

@ -0,0 +1,295 @@
{
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"clickhouse-mixin"
],
"targetBlank": false,
"title": "Other ClickHouse dashboards",
"type": "dashboards",
"url": ""
}
],
"panels": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"description": "Logs volume grouped by \"level\" label.",
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "bars",
"fillOpacity": 50,
"stacking": {
"mode": "normal"
}
},
"unit": "none"
},
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": "(E|e)merg|(F|f)atal|(A|a)lert|(C|c)rit.*"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "purple",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "(E|e)(rr.*|RR.*)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "red",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "(W|w)(arn.*|ARN.*|rn|RN)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "orange",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "(N|n)(otice|ote)|(I|i)(nf.*|NF.*)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "green",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "dbg.*|DBG.*|(D|d)(EBUG|ebug)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "blue",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "(T|t)(race|RACE)"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "light-blue",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byRegexp",
"options": "logs"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "text",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 6,
"w": 24
},
"id": 1,
"interval": "30s",
"options": {
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "v10.0.0",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"expr": "sum by (level) (count_over_time({job=~\".*/clickhouse.*\",job=~\"$job\",instance=~\"$instance\",level=~\"$level\"}\n|~ \"$regex_search\"\n\n[$__interval]))\n",
"legendFormat": "{{ level }}"
}
],
"title": "Logs volume",
"transformations": [
{
"id": "renameByRegex",
"options": {
"regex": "Value",
"renamePattern": "logs"
}
}
],
"type": "timeseries"
},
{
"datasource": {
"type": "datasource",
"uid": "-- Mixed --"
},
"gridPos": {
"h": 18,
"w": 24
},
"id": 2,
"options": {
"dedupStrategy": "exact",
"enableLogDetails": true,
"prettifyLogMessage": true,
"showTime": false,
"wrapLogMessage": true
},
"pluginVersion": "v10.0.0",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"expr": "{job=~\".*/clickhouse.*\",job=~\"$job\",instance=~\"$instance\",level=~\"$level\"} \n|~ \"$regex_search\"\n\n\n"
}
],
"title": "Logs",
"type": "logs"
}
],
"refresh": "1m",
"schemaVersion": 36,
"tags": [
"clickhouse-mixin"
],
"templating": {
"list": [
{
"label": "Loki data source",
"name": "loki_datasource",
"query": "loki",
"regex": "",
"type": "datasource"
},
{
"allValue": ".*",
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"includeAll": true,
"label": "Job",
"multi": true,
"name": "job",
"query": "label_values({job=~\".*/clickhouse.*\"}, job)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"allValue": ".*",
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"includeAll": true,
"label": "Instance",
"multi": true,
"name": "instance",
"query": "label_values({job=~\".*/clickhouse.*\",job=~\"$job\"}, instance)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"allValue": ".*",
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"includeAll": true,
"label": "Level",
"multi": true,
"name": "level",
"query": "label_values({job=~\".*/clickhouse.*\",job=~\"$job\",instance=~\"$instance\"}, level)",
"refresh": 2,
"sort": 1,
"type": "query"
},
{
"current": {
"selected": false,
"text": "",
"value": ""
},
"label": "Regex search",
"name": "regex_search",
"options": [
{
"selected": true,
"text": "",
"value": ""
}
],
"query": "",
"type": "textbox"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timezone": "utc",
"title": "ClickHouse logs",
"uid": "clickhouse-logs-overview"
}

Some files were not shown because too many files have changed in this diff Show more