feat: Memory stats (#2162)

2024-12-14 11:58:02 +00:00 · 2023-11-13 13:58:29 +02:00 · 2023-11-13 13:58:29 +02:00 · 5ca2be1185
commit 5ca2be1185
parent 4b685aa809
9 changed files with 226 additions and 35 deletions
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 7d206c88fcbed68ce899971cba802ba17b49ceae
+Subproject commit 88514b9270edc194ad9ea018e614e2d1b17b3962
--- a/src/core/size_tracking_channel.h
+++ b/src/core/size_tracking_channel.h
@ -0,0 +1,75 @@
 // Copyright 2023, DragonflyDB authors.  All rights reserved.
 // See LICENSE for licensing terms.
 //
 #pragma once
 #include <atomic>
 #include "core/fibers.h"
 namespace dfly {
 // SimpleQueue-like interface, but also keeps track over the size of Ts it owns.
 // It has a slightly less efficient TryPush() API as it forces construction of Ts even if they are
 // not pushed.
 // T must have a .size() method, which should return the heap-allocated size of T, excluding
 // anything included in sizeof(T). We could generalize this in the future.
 template <typename T, typename Queue = folly::ProducerConsumerQueue<T>> class SizeTrackingChannel {
 public:
  SizeTrackingChannel(size_t n, unsigned num_producers = 1) : queue_(n, num_producers) {
  }
  // Here and below, we must accept a T instead of building it from variadic args, as we need to
  // know its size in case it is added.
  void Push(T t) noexcept {
    size_.fetch_add(t.size(), std::memory_order_relaxed);
    queue_.Push(std::move(t));
  }
  bool TryPush(T t) noexcept {
    const size_t size = t.size();
    if (queue_.TryPush(std::move(t))) {
      size_.fetch_add(size, std::memory_order_relaxed);
      return true;
    }
    return false;
  }
  bool Pop(T& dest) {
    if (queue_.Pop(dest)) {
      size_.fetch_sub(dest.size(), std::memory_order_relaxed);
      return true;
    }
    return false;
  }
  void StartClosing() {
    queue_.StartClosing();
  }
  bool TryPop(T& dest) {
    if (queue_.TryPop(dest)) {
      size_.fetch_sub(dest.size(), std::memory_order_relaxed);
      return true;
    }
    return false;
  }
  bool IsClosing() const {
    return queue_.IsClosing();
  }
  size_t GetSize() const {
    return queue_.Capacity() * sizeof(T) + size_.load(std::memory_order_relaxed);
  }
 private:
  SimpleChannel<T, Queue> queue_;
  std::atomic<size_t> size_ = 0;
 };
 }  // namespace dfly
--- a/src/facade/dragonfly_connection.h
+++ b/src/facade/dragonfly_connection.h
@ -187,6 +187,10 @@ class Connection : public util::Connection {
    return name_;
  }
  base::IoBuf::MemoryUsage GetMemoryUsage() const {
    return io_buf_.GetMemoryUsage();
  }
  ConnectionContext* cntx();
  // Requests that at some point, this connection will be migrated to `dest` thread.
--- a/src/server/memory_cmd.cc
+++ b/src/server/memory_cmd.cc
@ -7,9 +7,13 @@
 #include <absl/strings/str_cat.h>
 #include <mimalloc.h>
 #include "base/io_buf.h"
 #include "facade/dragonfly_connection.h"
 #include "facade/error.h"
 #include "server/engine_shard_set.h"
 #include "server/server_family.h"
 #include "server/server_state.h"
 #include "server/snapshot.h"
 using namespace std;
 using namespace facade;
@ -75,7 +79,7 @@ size_t MemoryUsage(PrimeIterator it) {
 }  // namespace
-MemoryCmd::MemoryCmd(ServerFamily* owner, ConnectionContext* cntx) : cntx_(cntx) {
+MemoryCmd::MemoryCmd(ServerFamily* owner, ConnectionContext* cntx) : cntx_(cntx), owner_(owner) {
 }
 void MemoryCmd::Run(CmdArgList args) {
@ -84,6 +88,8 @@ void MemoryCmd::Run(CmdArgList args) {
  if (sub_cmd == "HELP") {
    string_view help_arr[] = {
        "MEMORY <subcommand> [<arg> ...]. Subcommands are:",
        "STATS",
        "    Shows breakdown of memory.",
        "MALLOC-STATS [BACKING] [thread-id]",
        "    Show malloc stats for a heap residing in specified thread-id. 0 by default.",
        "    If BACKING is specified, show stats for the backing heap.",
@ -95,6 +101,10 @@ void MemoryCmd::Run(CmdArgList args) {
    return (*cntx_)->SendSimpleStrArr(help_arr);
  };
  if (sub_cmd == "STATS") {
    return Stats();
  }
  if (sub_cmd == "USAGE" && args.size() > 1) {
    string_view key = ArgS(args, 1);
    return Usage(key);
@ -143,6 +153,100 @@ void MemoryCmd::Run(CmdArgList args) {
  return (*cntx_)->SendError(err, kSyntaxErrType);
 }
 namespace {
 struct ConnectionMemoryUsage {
  size_t connection_count = 0;
  size_t pipelined_bytes = 0;
  base::IoBuf::MemoryUsage connections_memory;
  size_t replication_connection_count = 0;
  base::IoBuf::MemoryUsage replication_memory;
 };
 ConnectionMemoryUsage GetConnectionMemoryUsage(ServerFamily* server) {
  Mutex mu;
  ConnectionMemoryUsage mem ABSL_GUARDED_BY(mu);
  for (auto* listener : server->GetListeners()) {
    listener->TraverseConnections([&](unsigned thread_index, util::Connection* conn) {
      auto* dfly_conn = static_cast<facade::Connection*>(conn);
      auto* cntx = static_cast<ConnectionContext*>(dfly_conn->cntx());
      lock_guard lock(mu);
      if (cntx->replication_flow == nullptr) {
        mem.connection_count++;
        mem.connections_memory += dfly_conn->GetMemoryUsage();
      } else {
        mem.replication_connection_count++;
        mem.replication_memory += dfly_conn->GetMemoryUsage();
      }
      if (cntx != nullptr) {
        mem.pipelined_bytes += cntx->conn_state.exec_info.body.capacity() * sizeof(StoredCmd);
        for (const auto& pipeline : cntx->conn_state.exec_info.body) {
          mem.pipelined_bytes += pipeline.UsedHeapMemory();
        }
      }
    });
  }
  return mem;
 }
 void PushMemoryUsageStats(const base::IoBuf::MemoryUsage& mem, string_view prefix, size_t total,
                          vector<pair<string, size_t>>* stats) {
  stats->push_back({absl::StrCat(prefix, ".total_bytes"), total});
  stats->push_back({absl::StrCat(prefix, ".consumed_bytes"), mem.consumed});
  stats->push_back({absl::StrCat(prefix, ".pending_input_bytes"), mem.input_length});
  stats->push_back({absl::StrCat(prefix, ".pending_output_bytes"), mem.append_length});
 }
 }  // namespace
 void MemoryCmd::Stats() {
  vector<pair<string, size_t>> stats;
  stats.reserve(25);
  auto server_metrics = owner_->GetMetrics();
  // RSS
  stats.push_back({"rss_bytes", rss_mem_current.load(memory_order_relaxed)});
  stats.push_back({"rss_peak_bytes", rss_mem_peak.load(memory_order_relaxed)});
  // Used by DbShards and DashTable
  stats.push_back({"data_bytes", used_mem_current.load(memory_order_relaxed)});
  stats.push_back({"data_peak_bytes", used_mem_peak.load(memory_order_relaxed)});
  ConnectionMemoryUsage connection_memory = GetConnectionMemoryUsage(owner_);
  // Connection stats, excluding replication connections
  stats.push_back({"connections.count", connection_memory.connection_count});
  PushMemoryUsageStats(
      connection_memory.connections_memory, "connections",
      connection_memory.connections_memory.GetTotalSize() + connection_memory.pipelined_bytes,
      &stats);
  stats.push_back({"connections.pipeline_bytes", connection_memory.pipelined_bytes});
  // Replication connection stats
  stats.push_back(
      {"replication.connections_count", connection_memory.replication_connection_count});
  PushMemoryUsageStats(connection_memory.replication_memory, "replication",
                       connection_memory.replication_memory.GetTotalSize(), &stats);
  atomic<size_t> serialization_memory = 0;
  shard_set->pool()->AwaitFiberOnAll(
      [&](auto*) { serialization_memory.fetch_add(SliceSnapshot::GetThreadLocalMemoryUsage()); });
  // Serialization stats, including both replication-related serialization and saving to RDB files.
  stats.push_back({"serialization", serialization_memory.load()});
  (*cntx_)->StartCollection(stats.size(), RedisReplyBuilder::MAP);
  for (const auto& [k, v] : stats) {
    (*cntx_)->SendBulkString(k);
    (*cntx_)->SendLong(v);
  }
 }
 void MemoryCmd::Usage(std::string_view key) {
  ShardId sid = Shard(key, shard_set->size());
  ssize_t memory_usage = shard_set->pool()->at(sid)->AwaitBrief([key, this]() -> ssize_t {
--- a/src/server/memory_cmd.h
+++ b/src/server/memory_cmd.h
@ -17,9 +17,11 @@ class MemoryCmd {
  void Run(CmdArgList args);
 private:
  void Stats();
  void Usage(std::string_view key);
  ConnectionContext* cntx_;
  ServerFamily* owner_;
 };
 }  // namespace dfly
--- a/src/server/rdb_save.cc
+++ b/src/server/rdb_save.cc
@ -950,10 +950,6 @@ class RdbSaver::Impl {
  // Multi entry compression is available only on df snapshot, this will
  // make snapshot size smaller and opreation faster.
  CompressionMode compression_mode_;
  struct Stats {
    std::atomic<size_t> pulled_bytes{0};
  } stats_;
 };
 // We pass K=sz to say how many producers are pushing data in order to maintain
@ -1043,8 +1039,6 @@ error_code RdbSaver::Impl::ConsumeChannel(const Cancellation* cll) {
        continue;
      DVLOG(2) << "Pulled " << record->id;
      stats_.pulled_bytes.fetch_add(record->value.size(), memory_order_relaxed);
      io_error = sink_->Write(io::Buffer(record->value));
      if (io_error) {
        break;
@ -1052,17 +1046,12 @@ error_code RdbSaver::Impl::ConsumeChannel(const Cancellation* cll) {
    } while ((record = records_popper.TryPop()));
  }  // while (records_popper.Pop())
  size_t pushed_bytes = 0;
  for (auto& ptr : shard_snapshots_) {
    ptr->Join();
    pushed_bytes += ptr->pushed_bytes();
  }
  DCHECK(!record.has_value() || !channel_.TryPop(*record));
  VLOG(1) << "Channel pulled bytes: " << stats_.pulled_bytes.load(memory_order_relaxed)
          << " pushed bytes: " << pushed_bytes;
  return io_error;
 }
@ -1105,13 +1094,12 @@ void RdbSaver::Impl::Cancel() {
 // This function is called from connection thread when info command is invoked.
 // All accessed variableds must be thread safe, as they are fetched not from the rdb saver thread.
 size_t RdbSaver::Impl::GetTotalBuffersSize() const {
-  std::atomic<size_t> pushed_bytes{0};
+  std::atomic<size_t> channel_bytes{0};
  std::atomic<size_t> serializer_bytes{0};
  size_t pulled_bytes = stats_.pulled_bytes.load(memory_order_relaxed);
-  auto cb = [this, &pushed_bytes, &serializer_bytes](ShardId sid) {
+  auto cb = [this, &channel_bytes, &serializer_bytes](ShardId sid) {
    auto& snapshot = shard_snapshots_[sid];
-    pushed_bytes.fetch_add(snapshot->pushed_bytes(), memory_order_relaxed);
+    channel_bytes.fetch_add(snapshot->GetTotalChannelCapacity(), memory_order_relaxed);
    serializer_bytes.store(snapshot->GetTotalBufferCapacity(), memory_order_relaxed);
  };
@ -1119,17 +1107,11 @@ size_t RdbSaver::Impl::GetTotalBuffersSize() const {
    cb(0);
  } else {
    shard_set->RunBriefInParallel([&](EngineShard* es) { cb(es->shard_id()); });
    // Note that pushed bytes and pulled bytes values are fetched at different times, as we need to
    // calc the pushed bytes using RunBriefInParallel.
    // pulled bytes might be higher untill we return here from RunBriefInParallel.
  }
  size_t total_bytes = pushed_bytes.load(memory_order_relaxed) +
                       serializer_bytes.load(memory_order_relaxed) - pulled_bytes;
  VLOG(2) << "pushed_bytes:" << pushed_bytes.load(memory_order_relaxed)
          << " serializer_bytes: " << serializer_bytes.load(memory_order_relaxed)
          << " pulled_bytes: " << pulled_bytes << " total_bytes:" << total_bytes;
-  return total_bytes;
+  VLOG(2) << "channel_bytes:" << channel_bytes.load(memory_order_relaxed)
          << " serializer_bytes: " << serializer_bytes.load(memory_order_relaxed);
  return channel_bytes.load(memory_order_relaxed) + serializer_bytes.load(memory_order_relaxed);
 }
 RdbSaver::GlobalData RdbSaver::GetGlobalData(const Service* service) {
--- a/src/server/server_family.h
+++ b/src/server/server_family.h
@ -182,6 +182,10 @@ class ServerFamily {
    return dfly_cmd_.get();
  }
  const std::vector<facade::Listener*>& GetListeners() const {
    return listeners_;
  }
  bool HasReplica() const;
  std::optional<Replica::Info> GetReplicaInfo() const;
  std::string GetReplicaMasterId() const;
--- a/src/server/snapshot.cc
+++ b/src/server/snapshot.cc
@ -25,12 +25,26 @@ using namespace std;
 using namespace util;
 using namespace chrono_literals;
 namespace {
 thread_local absl::flat_hash_set<SliceSnapshot*> tl_slice_snapshots;
 }  // namespace
 SliceSnapshot::SliceSnapshot(DbSlice* slice, RecordChannel* dest, CompressionMode compression_mode)
    : db_slice_(slice), dest_(dest), compression_mode_(compression_mode) {
  db_array_ = slice->databases();
  tl_slice_snapshots.insert(this);
 }
 SliceSnapshot::~SliceSnapshot() {
  tl_slice_snapshots.erase(this);
 }
 size_t SliceSnapshot::GetThreadLocalMemoryUsage() {
  size_t mem = 0;
  for (SliceSnapshot* snapshot : tl_slice_snapshots) {
    mem += snapshot->GetTotalBufferCapacity() + snapshot->GetTotalChannelCapacity();
  }
  return mem;
 }
 void SliceSnapshot::Start(bool stream_journal, const Cancellation* cll) {
@ -274,8 +288,6 @@ bool SliceSnapshot::PushSerializedToChannel(bool force) {
  if (serialized == 0)
    return 0;
  stats_.pushed_bytes += serialized;
  auto id = rec_id_++;
  DVLOG(2) << "Pushed " << id;
  DbRecord db_rec{.id = id, .value = std::move(sfile.val)};
@ -335,4 +347,8 @@ size_t SliceSnapshot::GetTotalBufferCapacity() const {
  return serializer_->GetTotalBufferCapacity();
 }
 size_t SliceSnapshot::GetTotalChannelCapacity() const {
  return dest_->GetSize();
 }
 }  // namespace dfly
--- a/src/server/snapshot.h
+++ b/src/server/snapshot.h
@ -8,6 +8,7 @@
 #include <bitset>
 #include "base/pod_array.h"
 #include "core/size_tracking_channel.h"
 #include "io/file.h"
 #include "server/db_slice.h"
 #include "server/rdb_save.h"
@ -50,13 +51,20 @@ class SliceSnapshot {
  struct DbRecord {
    uint64_t id;
    std::string value;
    size_t size() const {
      constexpr size_t kSmallStringOptSize = 15;
      return value.capacity() > kSmallStringOptSize ? value.capacity() : 0UL;
    }
  };
-  using RecordChannel = SimpleChannel<DbRecord, base::mpmc_bounded_queue<DbRecord>>;
+  using RecordChannel = SizeTrackingChannel<DbRecord, base::mpmc_bounded_queue<DbRecord>>;
  SliceSnapshot(DbSlice* slice, RecordChannel* dest, CompressionMode compression_mode);
  ~SliceSnapshot();
  static size_t GetThreadLocalMemoryUsage();
  // Initialize snapshot, start bucket iteration fiber, register listeners.
  // In journal streaming mode it needs to be stopped by either Stop or Cancel.
  void Start(bool stream_journal, const Cancellation* cll);
@ -114,15 +122,12 @@ class SliceSnapshot {
    return snapshot_version_;
  }
  size_t pushed_bytes() const {
    return stats_.pushed_bytes;
  }
  const RdbTypeFreqMap& freq_map() const {
    return type_freq_map_;
  }
-  size_t GetTotalBufferCapacity() const;
+  size_t GetTotalBufferCapacity() const;   // In bytes
  size_t GetTotalChannelCapacity() const;  // In bytes
 private:
  DbSlice* db_slice_;
@ -148,7 +153,6 @@ class SliceSnapshot {
  uint64_t rec_id_ = 0;
  struct Stats {
    size_t pushed_bytes = 0;
    size_t loop_serialized = 0, skipped = 0, side_saved = 0;
    size_t savecb_calls = 0;
  } stats_;
		`@ -1 +1 @@`
			`Subproject commit 7d206c88fcbed68ce899971cba802ba17b49ceae`				`Subproject commit 88514b9270edc194ad9ea018e614e2d1b17b3962`