mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2024-12-14 11:58:02 +00:00
feat: expose fiber responsiveness metrics (#2125)
Should allow track caches where Dragonfly is not responsive to I/O due to big CPU tasks. Also, update the local grafana dashboard. Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
parent
b9781c4903
commit
c7db025a48
5 changed files with 143 additions and 21 deletions
2
helio
2
helio
|
@ -1 +1 @@
|
|||
Subproject commit fe7ec28642c1b699bdc8839296f354d797ee0365
|
||||
Subproject commit 1fea6effc72919649c815afb04e9c7829b0240ab
|
|
@ -850,7 +850,7 @@ void PrintPrometheusMetrics(const Metrics& m, StringResponse* resp) {
|
|||
AppendMetricValue("role", 1, {"role"}, {m.is_master ? "master" : "replica"}, &resp->body());
|
||||
AppendMetricWithoutLabels("master", "1 if master 0 if replica", m.is_master ? 1 : 0,
|
||||
MetricType::GAUGE, &resp->body());
|
||||
AppendMetricWithoutLabels("uptime_in_seconds", "", m.uptime, MetricType::GAUGE, &resp->body());
|
||||
AppendMetricWithoutLabels("uptime_in_seconds", "", m.uptime, MetricType::COUNTER, &resp->body());
|
||||
|
||||
// Clients metrics
|
||||
AppendMetricWithoutLabels("connected_clients", "", m.conn_stats.num_conns, MetricType::GAUGE,
|
||||
|
@ -923,7 +923,7 @@ void PrintPrometheusMetrics(const Metrics& m, StringResponse* resp) {
|
|||
&command_metrics);
|
||||
for (const auto& [name, stat] : m.cmd_stats_map) {
|
||||
const auto calls = stat.first;
|
||||
const auto duration_seconds = stat.second * 0.001;
|
||||
const double duration_seconds = stat.second * 0.001;
|
||||
AppendMetricValue("commands_total", calls, {"cmd"}, {name}, &command_metrics);
|
||||
AppendMetricValue("commands_duration_seconds_total", duration_seconds, {"cmd"}, {name},
|
||||
&command_metrics);
|
||||
|
@ -944,6 +944,18 @@ void PrintPrometheusMetrics(const Metrics& m, StringResponse* resp) {
|
|||
absl::StrAppend(&resp->body(), replication_lag_metrics);
|
||||
}
|
||||
|
||||
AppendMetricWithoutLabels("fiber_switch_total", "", m.fiber_switch_cnt, MetricType::COUNTER,
|
||||
&resp->body());
|
||||
double delay_seconds = m.fiber_switch_delay_ns * 1e-9;
|
||||
AppendMetricWithoutLabels("fiber_switch_delay_seconds_total", "", delay_seconds,
|
||||
MetricType::COUNTER, &resp->body());
|
||||
|
||||
AppendMetricWithoutLabels("fiber_longrun_total", "", m.fiber_longrun_cnt, MetricType::COUNTER,
|
||||
&resp->body());
|
||||
double longrun_seconds = m.fiber_longrun_ns * 1e-9;
|
||||
AppendMetricWithoutLabels("fiber_longrun_seconds_total", "", longrun_seconds, MetricType::COUNTER,
|
||||
&resp->body());
|
||||
|
||||
absl::StrAppend(&resp->body(), db_key_metrics);
|
||||
absl::StrAppend(&resp->body(), db_key_expire_metrics);
|
||||
}
|
||||
|
@ -1402,6 +1414,11 @@ Metrics ServerFamily::GetMetrics() const {
|
|||
|
||||
lock_guard lk(mu);
|
||||
|
||||
result.fiber_switch_cnt += fb2::FiberSwitchEpoch();
|
||||
result.fiber_switch_delay_ns += fb2::FiberSwitchDelay();
|
||||
result.fiber_longrun_cnt += fb2::FiberLongRunCnt();
|
||||
result.fiber_longrun_ns += fb2::FiberLongRunSum();
|
||||
|
||||
result.coordinator_stats += ss->stats;
|
||||
result.conn_stats += ss->connection_stats;
|
||||
|
||||
|
|
|
@ -86,6 +86,12 @@ struct Metrics {
|
|||
size_t small_string_bytes = 0;
|
||||
uint32_t traverse_ttl_per_sec = 0;
|
||||
uint32_t delete_ttl_per_sec = 0;
|
||||
uint64_t fiber_switch_cnt = 0;
|
||||
uint64_t fiber_switch_delay_ns = 0;
|
||||
|
||||
// Statistics about fibers running for a long time (more than 1ms).
|
||||
uint64_t fiber_longrun_cnt = 0;
|
||||
uint64_t fiber_longrun_ns = 0;
|
||||
|
||||
std::map<std::string, std::pair<uint64_t, uint64_t>> cmd_stats_map; // command call frequencies
|
||||
|
||||
|
|
|
@ -105,7 +105,7 @@
|
|||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"pluginVersion": "9.3.6",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -191,7 +191,7 @@
|
|||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"pluginVersion": "9.3.6",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -282,7 +282,7 @@
|
|||
"showThresholdLabels": false,
|
||||
"showThresholdMarkers": true
|
||||
},
|
||||
"pluginVersion": "10.2.0",
|
||||
"pluginVersion": "9.3.6",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
@ -350,7 +350,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "10.2.0",
|
||||
"pluginVersion": "9.3.6",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
@ -456,7 +456,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "10.2.0",
|
||||
"pluginVersion": "9.3.6",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
@ -574,7 +574,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "10.2.0",
|
||||
"pluginVersion": "9.3.6",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
@ -690,7 +690,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "10.2.0",
|
||||
"pluginVersion": "9.3.6",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
@ -791,7 +791,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "10.2.0",
|
||||
"pluginVersion": "9.3.6",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
@ -912,7 +912,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "10.2.0",
|
||||
"pluginVersion": "9.3.6",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
@ -1033,7 +1033,7 @@
|
|||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "10.2.0",
|
||||
"pluginVersion": "9.3.6",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
|
@ -1110,7 +1110,6 @@
|
|||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
|
@ -1124,7 +1123,6 @@
|
|||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
|
@ -1155,7 +1153,7 @@
|
|||
}
|
||||
]
|
||||
},
|
||||
"unit": "µs"
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
|
@ -1187,7 +1185,107 @@
|
|||
"disableTextWrap": false,
|
||||
"editorMode": "code",
|
||||
"expr":
|
||||
"rate(dragonfly_fiber_switch_delay_seconds_total[$__rate_interval])*1000000/rate(dragonfly_fiber_switch_total[$__rate_interval])",
|
||||
"rate(dragonfly_fiber_switch_delay_seconds_total[$__rate_interval])/rate(dragonfly_fiber_switch_total[$__rate_interval])",
|
||||
"fullMetaSearch": false,
|
||||
"includeNullMetadata": false,
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A",
|
||||
"useBackend": false
|
||||
}
|
||||
],
|
||||
"title": "FiberSwitchDelay",
|
||||
"transformations": [],
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 30
|
||||
},
|
||||
"id": 20,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"disableTextWrap": false,
|
||||
"editorMode": "code",
|
||||
"expr":
|
||||
"rate(dragonfly_fiber_longrun_seconds_total[$__rate_interval])/rate(dragonfly_fiber_longrun_total[$__rate_interval])",
|
||||
"fullMetaSearch": false,
|
||||
"includeNullMetadata": false,
|
||||
"instant": false,
|
||||
|
@ -1203,7 +1301,8 @@
|
|||
}
|
||||
],
|
||||
"refresh": "",
|
||||
"schemaVersion": 38,
|
||||
"schemaVersion": 37,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"prometheus",
|
||||
"dragonfly"
|
||||
|
|
|
@ -29,14 +29,14 @@ scrape_configs:
|
|||
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||
|
||||
- job_name: dragonfly
|
||||
scrape_interval: 5s
|
||||
scrape_interval: 1s
|
||||
static_configs:
|
||||
- targets: ['host.docker.internal:6379']
|
||||
|
||||
- job_name: 'prometheus'
|
||||
|
||||
# Override the global default and scrape targets from this job every 5 seconds.
|
||||
scrape_interval: 5s
|
||||
scrape_interval: 1s
|
||||
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
|
@ -45,7 +45,7 @@ scrape_configs:
|
|||
- job_name: 'node-exporter'
|
||||
|
||||
# Override the global default and scrape targets from this job every 5 seconds.
|
||||
scrape_interval: 5s
|
||||
scrape_interval: 1s
|
||||
static_configs:
|
||||
- targets: ['node-exporter:9100']
|
||||
labels:
|
||||
|
|
Loading…
Reference in a new issue