1
0
Fork 0
mirror of https://github.com/dragonflydb/dragonfly.git synced 2024-12-14 11:58:02 +00:00

chore: remove batch reply statistics (#2490)

1. average batch latency will always be 0, because even in cases we have outliers they will be dominated by
   small CPU only copies that take dozens of ns.
2. Measuring an operation like kBatch, which is solely CPU-based, necessitated the use of a clock. According to the CPU profiler,
   this contributed to approximately 5% of the CPU usage.

Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
Roman Gershman 2024-01-29 03:23:38 +02:00 committed by GitHub
parent 754a186f5a
commit d7604c1bd0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 18 additions and 62 deletions

View file

@ -41,7 +41,7 @@ ConnectionStats& ConnectionStats::operator+=(const ConnectionStats& o) {
}
ReplyStats& ReplyStats::operator+=(const ReplyStats& o) {
static_assert(sizeof(ReplyStats) == 80u);
static_assert(sizeof(ReplyStats) == 64u);
ADD(io_write_cnt);
ADD(io_write_bytes);
@ -49,9 +49,7 @@ ReplyStats& ReplyStats::operator+=(const ReplyStats& o) {
err_count[k_v.first] += k_v.second;
}
for (unsigned i = 0; i < kNumTypes; ++i) {
send_stats[i] += o.send_stats[i];
}
send_stats += o.send_stats;
return *this;
}

View file

@ -60,12 +60,6 @@ struct ConnectionStats {
};
struct ReplyStats {
enum SendStatsType {
kRegular, // Send() operations that are written to sockets
kBatch, // Send() operations that are internally batched to a buffer
kNumTypes, // Number of types, do not use directly
};
struct SendStats {
int64_t count = 0;
int64_t total_duration = 0;
@ -79,7 +73,8 @@ struct ReplyStats {
}
};
SendStats send_stats[SendStatsType::kNumTypes];
// Send() operations that are written to sockets
SendStats send_stats;
size_t io_write_cnt = 0;
size_t io_write_bytes = 0;

View file

@ -13,6 +13,7 @@
#include "base/logging.h"
#include "core/heap_size.h"
#include "facade/error.h"
#include "util/fibers/proactor_base.h"
using namespace std;
using absl::StrAppend;
@ -68,15 +69,6 @@ void SinkReplyBuilder::ResetThreadLocalStats() {
}
void SinkReplyBuilder::Send(const iovec* v, uint32_t len) {
int64_t before = absl::GetCurrentTimeNanos();
ReplyStats::SendStatsType stats_type = ReplyStats::SendStatsType::kRegular;
auto cleanup = absl::MakeCleanup([&]() {
int64_t after = absl::GetCurrentTimeNanos();
tl_facade_stats->reply_stats.send_stats[stats_type].count++;
tl_facade_stats->reply_stats.send_stats[stats_type].total_duration += (after - before) / 1'000;
});
has_replied_ = true;
DCHECK(sink_);
constexpr size_t kMaxBatchSize = 1024;
@ -88,8 +80,6 @@ void SinkReplyBuilder::Send(const iovec* v, uint32_t len) {
// Allow batching with up to kMaxBatchSize of data.
if ((should_batch_ || should_aggregate_) && (batch_.size() + bsize < kMaxBatchSize)) {
stats_type = ReplyStats::SendStatsType::kBatch;
batch_.reserve(batch_.size() + bsize);
for (unsigned i = 0; i < len; ++i) {
std::string_view src((char*)v[i].iov_base, v[i].iov_len);
@ -100,6 +90,7 @@ void SinkReplyBuilder::Send(const iovec* v, uint32_t len) {
return;
}
int64_t before_ns = util::fb2::ProactorBase::GetMonotonicTimeNs();
error_code ec;
send_active_ = true;
tl_facade_stats->reply_stats.io_write_cnt++;
@ -121,6 +112,10 @@ void SinkReplyBuilder::Send(const iovec* v, uint32_t len) {
batch_.clear();
}
send_active_ = false;
int64_t after_ns = util::fb2::ProactorBase::GetMonotonicTimeNs();
tl_facade_stats->reply_stats.send_stats.count++;
tl_facade_stats->reply_stats.send_stats.total_duration += (after_ns - before_ns) / 1'000;
if (ec) {
DVLOG(1) << "Error writing to stream: " << ec.message();
ec_ = ec;

View file

@ -215,7 +215,6 @@ using namespace util;
using detail::SaveStagesController;
using http::StringResponse;
using strings::HumanReadableNumBytes;
using SendStatsType = facade::ReplyStats::SendStatsType;
namespace {
@ -1051,35 +1050,11 @@ void PrintPrometheusMetrics(const Metrics& m, StringResponse* resp) {
AppendMetricWithoutLabels("net_output_bytes_total", "", m.facade_stats.reply_stats.io_write_bytes,
MetricType::COUNTER, &resp->body());
{
string send_latency_metrics;
constexpr string_view kReplyLatency = "reply_duration_seconds";
AppendMetricHeader(kReplyLatency, "Reply latency per type", MetricType::COUNTER,
&send_latency_metrics);
string send_count_metrics;
constexpr string_view kReplyCount = "reply_total";
AppendMetricHeader(kReplyCount, "Reply count per type", MetricType::COUNTER,
&send_count_metrics);
for (unsigned i = 0; i < SendStatsType::kNumTypes; ++i) {
auto& stats = m.facade_stats.reply_stats.send_stats[i];
string_view type;
switch (SendStatsType(i)) {
case SendStatsType::kRegular:
type = "regular";
break;
case SendStatsType::kBatch:
type = "batch";
break;
case SendStatsType::kNumTypes:
type = "other";
break;
}
AppendMetricValue(kReplyLatency, double(stats.total_duration) * 1e-6, {"type"}, {type},
&send_latency_metrics);
AppendMetricValue(kReplyCount, stats.count, {"type"}, {type}, &send_count_metrics);
}
AppendMetricWithoutLabels("reply_duration_seconds", "",
m.facade_stats.reply_stats.send_stats.total_duration * 1e-6,
MetricType::COUNTER, &resp->body());
AppendMetricWithoutLabels("reply_total", "", m.facade_stats.reply_stats.send_stats.count,
MetricType::COUNTER, &resp->body());
// Tiered metrics.
if (m.disk_stats.read_total > 0) {
@ -1089,8 +1064,6 @@ void PrintPrometheusMetrics(const Metrics& m, StringResponse* resp) {
double(m.disk_stats.read_delay_usec) * 1e-6, MetricType::COUNTER,
&resp->body());
}
absl::StrAppend(&resp->body(), send_latency_metrics);
absl::StrAppend(&resp->body(), send_count_metrics);
}
// DB stats
@ -1642,9 +1615,7 @@ void ServerFamily::ResetStat() {
tl_facade_stats->reply_stats.io_write_bytes = 0;
tl_facade_stats->reply_stats.io_write_cnt = 0;
for (auto& send_stat : tl_facade_stats->reply_stats.send_stats) {
send_stat = {};
}
tl_facade_stats->reply_stats.send_stats = {};
service_.mutable_registry()->ResetCallStats(index);
});
@ -1878,11 +1849,8 @@ void ServerFamily::Info(CmdArgList args, ConnectionContext* cntx) {
append("defrag_attempt_total", m.shard_stats.defrag_attempt_total);
append("defrag_realloc_total", m.shard_stats.defrag_realloc_total);
append("defrag_task_invocation_total", m.shard_stats.defrag_task_invocation_total);
append("reply_count", reply_stats.send_stats[SendStatsType::kRegular].count);
append("reply_latency_usec", reply_stats.send_stats[SendStatsType::kRegular].total_duration);
append("reply_batch_count", reply_stats.send_stats[SendStatsType::kBatch].count);
append("reply_batch_latency_usec",
reply_stats.send_stats[SendStatsType::kBatch].total_duration);
append("reply_count", reply_stats.send_stats.count);
append("reply_latency_usec", reply_stats.send_stats.total_duration);
}
if (should_enter("TIERED", true)) {