From fd74fd5b4bfd7af995f733a22ed4a65aa08cb378 Mon Sep 17 00:00:00 2001 From: Roman Gershman Date: Sat, 18 May 2024 22:40:14 +0300 Subject: [PATCH] chore: Export replication memory stats (#3062) --- src/server/dflycmd.cc | 4 ++-- src/server/server_family.cc | 17 +++++++++++++---- src/server/server_family.h | 4 ++-- tools/local/monitoring/docker-compose.yml | 2 +- .../provisioning/dashboards/dashboard.json | 13 +++++++++++++ 5 files changed, 31 insertions(+), 9 deletions(-) diff --git a/src/server/dflycmd.cc b/src/server/dflycmd.cc index 09d5bd800..6adc47f36 100644 --- a/src/server/dflycmd.cc +++ b/src/server/dflycmd.cc @@ -719,10 +719,10 @@ void DflyCmd::GetReplicationMemoryStats(ReplicationMemoryStats* stats) const { const auto& flow = info->flows[shard->shard_id()]; if (flow.streamer) - stats->streamer_buf_capacity_bytes_ += flow.streamer->GetTotalBufferCapacities(); + stats->streamer_buf_capacity_bytes += flow.streamer->GetTotalBufferCapacities(); if (flow.saver) - stats->full_sync_buf_bytes_ += flow.saver->GetTotalBuffersSize(); + stats->full_sync_buf_bytes += flow.saver->GetTotalBuffersSize(); } }; shard_set->RunBlockingInParallel(cb); diff --git a/src/server/server_family.cc b/src/server/server_family.cc index dca042051..adbb09b36 100644 --- a/src/server/server_family.cc +++ b/src/server/server_family.cc @@ -1051,7 +1051,7 @@ void AppendMetricWithoutLabels(string_view name, string_view help, const absl::A AppendMetricValue(name, value, {}, {}, dest); } -void PrintPrometheusMetrics(const Metrics& m, StringResponse* resp) { +void PrintPrometheusMetrics(const Metrics& m, DflyCmd* dfly_cmd, StringResponse* resp) { // Server metrics AppendMetricHeader("version", "", MetricType::GAUGE, &resp->body()); AppendMetricValue("version", 1, {"version"}, {GetVersion()}, &resp->body()); @@ -1130,6 +1130,15 @@ void PrintPrometheusMetrics(const Metrics& m, StringResponse* resp) { if (added) absl::StrAppend(&resp->body(), type_used_memory_metric); } + if (!m.replication_metrics.empty()) { + ReplicationMemoryStats repl_mem; + dfly_cmd->GetReplicationMemoryStats(&repl_mem); + AppendMetricWithoutLabels( + "replication_streaming_bytes", "Stable sync replication memory usage", + repl_mem.streamer_buf_capacity_bytes, MetricType::GAUGE, &resp->body()); + AppendMetricWithoutLabels("replication_full_sync_bytes", "Full sync memory usage", + repl_mem.full_sync_buf_bytes, MetricType::GAUGE, &resp->body()); + } // Stats metrics AppendMetricWithoutLabels("connections_received_total", "", conn_stats.conn_received_cnt, @@ -1262,7 +1271,7 @@ void ServerFamily::ConfigureMetrics(util::HttpListenerBase* http_base) { auto cb = [this](const util::http::QueryArgs& args, util::HttpContext* send) { StringResponse resp = util::http::MakeStringResponse(boost::beast::http::status::ok); - PrintPrometheusMetrics(this->GetMetrics(), &resp); + PrintPrometheusMetrics(this->GetMetrics(), this->dfly_cmd_.get(), &resp); return send->Invoke(std::move(resp)); }; @@ -2027,8 +2036,8 @@ void ServerFamily::Info(CmdArgList args, ConnectionContext* cntx) { if (!m.replication_metrics.empty()) { ReplicationMemoryStats repl_mem; dfly_cmd_->GetReplicationMemoryStats(&repl_mem); - append("replication_streaming_buffer_bytes", repl_mem.streamer_buf_capacity_bytes_); - append("replication_full_sync_buffer_bytes", repl_mem.full_sync_buf_bytes_); + append("replication_streaming_buffer_bytes", repl_mem.streamer_buf_capacity_bytes); + append("replication_full_sync_buffer_bytes", repl_mem.full_sync_buf_bytes); } { diff --git a/src/server/server_family.h b/src/server/server_family.h index a064b4afb..2f8a151c8 100644 --- a/src/server/server_family.h +++ b/src/server/server_family.h @@ -62,8 +62,8 @@ struct ReplicaRoleInfo { }; struct ReplicationMemoryStats { - size_t streamer_buf_capacity_bytes_ = 0; // total capacities of streamer buffers - size_t full_sync_buf_bytes_ = 0; // total bytes used for full sync buffers + size_t streamer_buf_capacity_bytes = 0; // total capacities of streamer buffers + size_t full_sync_buf_bytes = 0; // total bytes used for full sync buffers }; // Global peak stats recorded after aggregating metrics over all shards. diff --git a/tools/local/monitoring/docker-compose.yml b/tools/local/monitoring/docker-compose.yml index 7202cff5b..eaf09ec0d 100644 --- a/tools/local/monitoring/docker-compose.yml +++ b/tools/local/monitoring/docker-compose.yml @@ -41,7 +41,7 @@ services: mode: global grafana: - image: grafana/grafana + image: grafana/grafana:10.1.10 user: '472' restart: always environment: diff --git a/tools/local/monitoring/grafana/provisioning/dashboards/dashboard.json b/tools/local/monitoring/grafana/provisioning/dashboards/dashboard.json index 3a7575795..abc86a5f6 100644 --- a/tools/local/monitoring/grafana/provisioning/dashboards/dashboard.json +++ b/tools/local/monitoring/grafana/provisioning/dashboards/dashboard.json @@ -495,6 +495,19 @@ "legendFormat": "max", "refId": "B", "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "dragonfly_used_memory_rss_bytes{instance=~\"$instance\"} ", + "range": true, + "format": "time_series", + "hide": false, + "legendFormat": "RSS", + "refId": "C" } ], "thresholds": [],