feat: Memory stats (#2162)

2024-12-14 11:58:02 +00:00 · 2023-11-13 13:58:29 +02:00 · 2023-11-13 13:58:29 +02:00 · 5ca2be1185
commit 5ca2be1185
parent 4b685aa809
9 changed files with 226 additions and 35 deletions
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 7d206c88fcbed68ce899971cba802ba17b49ceae
+Subproject commit 88514b9270edc194ad9ea018e614e2d1b17b3962
--- a/src/core/size_tracking_channel.h
+++ b/src/core/size_tracking_channel.h
@ -0,0 +1,75 @@
+// Copyright 2023, DragonflyDB authors.  All rights reserved.
+// See LICENSE for licensing terms.
+//
+
+#pragma once
+
+#include <atomic>
+
+#include "core/fibers.h"
+
+namespace dfly {
+
+// SimpleQueue-like interface, but also keeps track over the size of Ts it owns.
+// It has a slightly less efficient TryPush() API as it forces construction of Ts even if they are
+// not pushed.
+// T must have a .size() method, which should return the heap-allocated size of T, excluding
+// anything included in sizeof(T). We could generalize this in the future.
+template <typename T, typename Queue = folly::ProducerConsumerQueue<T>> class SizeTrackingChannel {
+ public:
+  SizeTrackingChannel(size_t n, unsigned num_producers = 1) : queue_(n, num_producers) {
+  }
+
+  // Here and below, we must accept a T instead of building it from variadic args, as we need to
+  // know its size in case it is added.
+  void Push(T t) noexcept {
+    size_.fetch_add(t.size(), std::memory_order_relaxed);
+    queue_.Push(std::move(t));
+  }
+
+  bool TryPush(T t) noexcept {
+    const size_t size = t.size();
+    if (queue_.TryPush(std::move(t))) {
+      size_.fetch_add(size, std::memory_order_relaxed);
+      return true;
+    }
+
+    return false;
+  }
+
+  bool Pop(T& dest) {
+    if (queue_.Pop(dest)) {
+      size_.fetch_sub(dest.size(), std::memory_order_relaxed);
+      return true;
+    }
+
+    return false;
+  }
+
+  void StartClosing() {
+    queue_.StartClosing();
+  }
+
+  bool TryPop(T& dest) {
+    if (queue_.TryPop(dest)) {
+      size_.fetch_sub(dest.size(), std::memory_order_relaxed);
+      return true;
+    }
+
+    return false;
+  }
+
+  bool IsClosing() const {
+    return queue_.IsClosing();
+  }
+
+  size_t GetSize() const {
+    return queue_.Capacity() * sizeof(T) + size_.load(std::memory_order_relaxed);
+  }
+
+ private:
+  SimpleChannel<T, Queue> queue_;
+  std::atomic<size_t> size_ = 0;
+};
+
+}  // namespace dfly
--- a/src/facade/dragonfly_connection.h
+++ b/src/facade/dragonfly_connection.h
@ -187,6 +187,10 @@ class Connection : public util::Connection {
    return name_;
  }

+  base::IoBuf::MemoryUsage GetMemoryUsage() const {
+    return io_buf_.GetMemoryUsage();
+  }
+
  ConnectionContext* cntx();

  // Requests that at some point, this connection will be migrated to `dest` thread.
--- a/src/server/memory_cmd.cc
+++ b/src/server/memory_cmd.cc
@ -7,9 +7,13 @@
 #include <absl/strings/str_cat.h>
 #include <mimalloc.h>

+#include "base/io_buf.h"
+#include "facade/dragonfly_connection.h"
 #include "facade/error.h"
 #include "server/engine_shard_set.h"
+#include "server/server_family.h"
 #include "server/server_state.h"
+#include "server/snapshot.h"

 using namespace std;
 using namespace facade;
@ -75,7 +79,7 @@ size_t MemoryUsage(PrimeIterator it) {

 }  // namespace

-MemoryCmd::MemoryCmd(ServerFamily* owner, ConnectionContext* cntx) : cntx_(cntx) {
+MemoryCmd::MemoryCmd(ServerFamily* owner, ConnectionContext* cntx) : cntx_(cntx), owner_(owner) {
 }

 void MemoryCmd::Run(CmdArgList args) {
@ -84,6 +88,8 @@ void MemoryCmd::Run(CmdArgList args) {
  if (sub_cmd == "HELP") {
    string_view help_arr[] = {
        "MEMORY <subcommand> [<arg> ...]. Subcommands are:",
+        "STATS",
+        "    Shows breakdown of memory.",
        "MALLOC-STATS [BACKING] [thread-id]",
        "    Show malloc stats for a heap residing in specified thread-id. 0 by default.",
        "    If BACKING is specified, show stats for the backing heap.",
@ -95,6 +101,10 @@ void MemoryCmd::Run(CmdArgList args) {
    return (*cntx_)->SendSimpleStrArr(help_arr);
  };

+  if (sub_cmd == "STATS") {
+    return Stats();
+  }
+
  if (sub_cmd == "USAGE" && args.size() > 1) {
    string_view key = ArgS(args, 1);
    return Usage(key);
@ -143,6 +153,100 @@ void MemoryCmd::Run(CmdArgList args) {
  return (*cntx_)->SendError(err, kSyntaxErrType);
 }

+namespace {
+
+struct ConnectionMemoryUsage {
+  size_t connection_count = 0;
+  size_t pipelined_bytes = 0;
+  base::IoBuf::MemoryUsage connections_memory;
+
+  size_t replication_connection_count = 0;
+  base::IoBuf::MemoryUsage replication_memory;
+};
+
+ConnectionMemoryUsage GetConnectionMemoryUsage(ServerFamily* server) {
+  Mutex mu;
+  ConnectionMemoryUsage mem ABSL_GUARDED_BY(mu);
+
+  for (auto* listener : server->GetListeners()) {
+    listener->TraverseConnections([&](unsigned thread_index, util::Connection* conn) {
+      auto* dfly_conn = static_cast<facade::Connection*>(conn);
+      auto* cntx = static_cast<ConnectionContext*>(dfly_conn->cntx());
+      lock_guard lock(mu);
+
+      if (cntx->replication_flow == nullptr) {
+        mem.connection_count++;
+        mem.connections_memory += dfly_conn->GetMemoryUsage();
+      } else {
+        mem.replication_connection_count++;
+        mem.replication_memory += dfly_conn->GetMemoryUsage();
+      }
+
+      if (cntx != nullptr) {
+        mem.pipelined_bytes += cntx->conn_state.exec_info.body.capacity() * sizeof(StoredCmd);
+        for (const auto& pipeline : cntx->conn_state.exec_info.body) {
+          mem.pipelined_bytes += pipeline.UsedHeapMemory();
+        }
+      }
+    });
+  }
+
+  return mem;
+}
+
+void PushMemoryUsageStats(const base::IoBuf::MemoryUsage& mem, string_view prefix, size_t total,
+                          vector<pair<string, size_t>>* stats) {
+  stats->push_back({absl::StrCat(prefix, ".total_bytes"), total});
+  stats->push_back({absl::StrCat(prefix, ".consumed_bytes"), mem.consumed});
+  stats->push_back({absl::StrCat(prefix, ".pending_input_bytes"), mem.input_length});
+  stats->push_back({absl::StrCat(prefix, ".pending_output_bytes"), mem.append_length});
+}
+
+}  // namespace
+
+void MemoryCmd::Stats() {
+  vector<pair<string, size_t>> stats;
+  stats.reserve(25);
+  auto server_metrics = owner_->GetMetrics();
+
+  // RSS
+  stats.push_back({"rss_bytes", rss_mem_current.load(memory_order_relaxed)});
+  stats.push_back({"rss_peak_bytes", rss_mem_peak.load(memory_order_relaxed)});
+
+  // Used by DbShards and DashTable
+  stats.push_back({"data_bytes", used_mem_current.load(memory_order_relaxed)});
+  stats.push_back({"data_peak_bytes", used_mem_peak.load(memory_order_relaxed)});
+
+  ConnectionMemoryUsage connection_memory = GetConnectionMemoryUsage(owner_);
+
+  // Connection stats, excluding replication connections
+  stats.push_back({"connections.count", connection_memory.connection_count});
+  PushMemoryUsageStats(
+      connection_memory.connections_memory, "connections",
+      connection_memory.connections_memory.GetTotalSize() + connection_memory.pipelined_bytes,
+      &stats);
+  stats.push_back({"connections.pipeline_bytes", connection_memory.pipelined_bytes});
+
+  // Replication connection stats
+  stats.push_back(
+      {"replication.connections_count", connection_memory.replication_connection_count});
+  PushMemoryUsageStats(connection_memory.replication_memory, "replication",
+                       connection_memory.replication_memory.GetTotalSize(), &stats);
+
+  atomic<size_t> serialization_memory = 0;
+  shard_set->pool()->AwaitFiberOnAll(
+      [&](auto*) { serialization_memory.fetch_add(SliceSnapshot::GetThreadLocalMemoryUsage()); });
+
+  // Serialization stats, including both replication-related serialization and saving to RDB files.
+  stats.push_back({"serialization", serialization_memory.load()});
+
+  (*cntx_)->StartCollection(stats.size(), RedisReplyBuilder::MAP);
+  for (const auto& [k, v] : stats) {
+    (*cntx_)->SendBulkString(k);
+    (*cntx_)->SendLong(v);
+  }
+}
+
 void MemoryCmd::Usage(std::string_view key) {
  ShardId sid = Shard(key, shard_set->size());
  ssize_t memory_usage = shard_set->pool()->at(sid)->AwaitBrief([key, this]() -> ssize_t {
--- a/src/server/memory_cmd.h
+++ b/src/server/memory_cmd.h
@ -17,9 +17,11 @@ class MemoryCmd {
  void Run(CmdArgList args);

 private:
+  void Stats();
  void Usage(std::string_view key);

  ConnectionContext* cntx_;
+  ServerFamily* owner_;
 };

 }  // namespace dfly
--- a/src/server/rdb_save.cc
+++ b/src/server/rdb_save.cc
@ -950,10 +950,6 @@ class RdbSaver::Impl {
  // Multi entry compression is available only on df snapshot, this will
  // make snapshot size smaller and opreation faster.
  CompressionMode compression_mode_;
-
-  struct Stats {
-    std::atomic<size_t> pulled_bytes{0};
-  } stats_;
 };

 // We pass K=sz to say how many producers are pushing data in order to maintain
@ -1043,8 +1039,6 @@ error_code RdbSaver::Impl::ConsumeChannel(const Cancellation* cll) {
        continue;

      DVLOG(2) << "Pulled " << record->id;
-      stats_.pulled_bytes.fetch_add(record->value.size(), memory_order_relaxed);
-
      io_error = sink_->Write(io::Buffer(record->value));
      if (io_error) {
        break;
@ -1052,17 +1046,12 @@ error_code RdbSaver::Impl::ConsumeChannel(const Cancellation* cll) {
    } while ((record = records_popper.TryPop()));
  }  // while (records_popper.Pop())

-  size_t pushed_bytes = 0;
  for (auto& ptr : shard_snapshots_) {
    ptr->Join();
-    pushed_bytes += ptr->pushed_bytes();
  }

  DCHECK(!record.has_value() || !channel_.TryPop(*record));

-  VLOG(1) << "Channel pulled bytes: " << stats_.pulled_bytes.load(memory_order_relaxed)
-          << " pushed bytes: " << pushed_bytes;
-
  return io_error;
 }

@ -1105,13 +1094,12 @@ void RdbSaver::Impl::Cancel() {
 // This function is called from connection thread when info command is invoked.
 // All accessed variableds must be thread safe, as they are fetched not from the rdb saver thread.
 size_t RdbSaver::Impl::GetTotalBuffersSize() const {
-  std::atomic<size_t> pushed_bytes{0};
+  std::atomic<size_t> channel_bytes{0};
  std::atomic<size_t> serializer_bytes{0};
-  size_t pulled_bytes = stats_.pulled_bytes.load(memory_order_relaxed);

-  auto cb = [this, &pushed_bytes, &serializer_bytes](ShardId sid) {
+  auto cb = [this, &channel_bytes, &serializer_bytes](ShardId sid) {
    auto& snapshot = shard_snapshots_[sid];
-    pushed_bytes.fetch_add(snapshot->pushed_bytes(), memory_order_relaxed);
+    channel_bytes.fetch_add(snapshot->GetTotalChannelCapacity(), memory_order_relaxed);
    serializer_bytes.store(snapshot->GetTotalBufferCapacity(), memory_order_relaxed);
  };

@ -1119,17 +1107,11 @@ size_t RdbSaver::Impl::GetTotalBuffersSize() const {
    cb(0);
  } else {
    shard_set->RunBriefInParallel([&](EngineShard* es) { cb(es->shard_id()); });
-    // Note that pushed bytes and pulled bytes values are fetched at different times, as we need to
-    // calc the pushed bytes using RunBriefInParallel.
-    // pulled bytes might be higher untill we return here from RunBriefInParallel.
  }
-  size_t total_bytes = pushed_bytes.load(memory_order_relaxed) +
-                       serializer_bytes.load(memory_order_relaxed) - pulled_bytes;
-  VLOG(2) << "pushed_bytes:" << pushed_bytes.load(memory_order_relaxed)
-          << " serializer_bytes: " << serializer_bytes.load(memory_order_relaxed)
-          << " pulled_bytes: " << pulled_bytes << " total_bytes:" << total_bytes;

-  return total_bytes;
+  VLOG(2) << "channel_bytes:" << channel_bytes.load(memory_order_relaxed)
+          << " serializer_bytes: " << serializer_bytes.load(memory_order_relaxed);
+  return channel_bytes.load(memory_order_relaxed) + serializer_bytes.load(memory_order_relaxed);
 }

 RdbSaver::GlobalData RdbSaver::GetGlobalData(const Service* service) {
--- a/src/server/server_family.h
+++ b/src/server/server_family.h
@ -182,6 +182,10 @@ class ServerFamily {
    return dfly_cmd_.get();
  }

+  const std::vector<facade::Listener*>& GetListeners() const {
+    return listeners_;
+  }
+
  bool HasReplica() const;
  std::optional<Replica::Info> GetReplicaInfo() const;
  std::string GetReplicaMasterId() const;
--- a/src/server/snapshot.cc
+++ b/src/server/snapshot.cc
@ -25,12 +25,26 @@ using namespace std;
 using namespace util;
 using namespace chrono_literals;

+namespace {
+thread_local absl::flat_hash_set<SliceSnapshot*> tl_slice_snapshots;
+}  // namespace
+
 SliceSnapshot::SliceSnapshot(DbSlice* slice, RecordChannel* dest, CompressionMode compression_mode)
    : db_slice_(slice), dest_(dest), compression_mode_(compression_mode) {
  db_array_ = slice->databases();
+  tl_slice_snapshots.insert(this);
 }

 SliceSnapshot::~SliceSnapshot() {
+  tl_slice_snapshots.erase(this);
+}
+
+size_t SliceSnapshot::GetThreadLocalMemoryUsage() {
+  size_t mem = 0;
+  for (SliceSnapshot* snapshot : tl_slice_snapshots) {
+    mem += snapshot->GetTotalBufferCapacity() + snapshot->GetTotalChannelCapacity();
+  }
+  return mem;
 }

 void SliceSnapshot::Start(bool stream_journal, const Cancellation* cll) {
@ -274,8 +288,6 @@ bool SliceSnapshot::PushSerializedToChannel(bool force) {
  if (serialized == 0)
    return 0;

-  stats_.pushed_bytes += serialized;
-
  auto id = rec_id_++;
  DVLOG(2) << "Pushed " << id;
  DbRecord db_rec{.id = id, .value = std::move(sfile.val)};
@ -335,4 +347,8 @@ size_t SliceSnapshot::GetTotalBufferCapacity() const {
  return serializer_->GetTotalBufferCapacity();
 }

+size_t SliceSnapshot::GetTotalChannelCapacity() const {
+  return dest_->GetSize();
+}
+
 }  // namespace dfly
--- a/src/server/snapshot.h
+++ b/src/server/snapshot.h
@ -8,6 +8,7 @@
 #include <bitset>

 #include "base/pod_array.h"
+#include "core/size_tracking_channel.h"
 #include "io/file.h"
 #include "server/db_slice.h"
 #include "server/rdb_save.h"
@ -50,13 +51,20 @@ class SliceSnapshot {
  struct DbRecord {
    uint64_t id;
    std::string value;
+
+    size_t size() const {
+      constexpr size_t kSmallStringOptSize = 15;
+      return value.capacity() > kSmallStringOptSize ? value.capacity() : 0UL;
+    }
  };

-  using RecordChannel = SimpleChannel<DbRecord, base::mpmc_bounded_queue<DbRecord>>;
+  using RecordChannel = SizeTrackingChannel<DbRecord, base::mpmc_bounded_queue<DbRecord>>;

  SliceSnapshot(DbSlice* slice, RecordChannel* dest, CompressionMode compression_mode);
  ~SliceSnapshot();

+  static size_t GetThreadLocalMemoryUsage();
+
  // Initialize snapshot, start bucket iteration fiber, register listeners.
  // In journal streaming mode it needs to be stopped by either Stop or Cancel.
  void Start(bool stream_journal, const Cancellation* cll);
@ -114,15 +122,12 @@ class SliceSnapshot {
    return snapshot_version_;
  }

-  size_t pushed_bytes() const {
-    return stats_.pushed_bytes;
-  }
-
  const RdbTypeFreqMap& freq_map() const {
    return type_freq_map_;
  }

-  size_t GetTotalBufferCapacity() const;
+  size_t GetTotalBufferCapacity() const;   // In bytes
+  size_t GetTotalChannelCapacity() const;  // In bytes

 private:
  DbSlice* db_slice_;
@ -148,7 +153,6 @@ class SliceSnapshot {
  uint64_t rec_id_ = 0;

  struct Stats {
-    size_t pushed_bytes = 0;
    size_t loop_serialized = 0, skipped = 0, side_saved = 0;
    size_t savecb_calls = 0;
  } stats_;