1
0
Fork 0
mirror of https://github.com/dragonflydb/dragonfly.git synced 2024-12-14 11:58:02 +00:00

Introduce SmallString as another option for CompactObject

This commit is contained in:
Roman Gershman 2022-02-24 15:22:59 +02:00
parent a93940913b
commit 737c5fed71
16 changed files with 435 additions and 43 deletions

11
.gitorderfile Normal file
View file

@ -0,0 +1,11 @@
*.py
*.md
*.in
*.txt
*.sh
*.yml
*.h
*.cc
*.lua
*.go
*

View file

@ -1,5 +1,5 @@
add_library(dfly_core compact_object.cc dragonfly_core.cc interpreter.cc add_library(dfly_core compact_object.cc dragonfly_core.cc interpreter.cc
tx_queue.cc) segment_allocator.cc small_string.cc tx_queue.cc)
cxx_link(dfly_core base absl::flat_hash_map absl::str_format redis_lib TRDP::lua cxx_link(dfly_core base absl::flat_hash_map absl::str_format redis_lib TRDP::lua
Boost::fiber crypto) Boost::fiber crypto)

View file

@ -34,10 +34,17 @@ size_t QlUsedSize(quicklist* ql) {
return res; return res;
} }
thread_local robj tmp_robj{ struct TL {
robj tmp_robj{
.type = 0, .encoding = 0, .lru = 0, .refcount = OBJ_STATIC_REFCOUNT, .ptr = nullptr}; .type = 0, .encoding = 0, .lru = 0, .refcount = OBJ_STATIC_REFCOUNT, .ptr = nullptr};
thread_local pmr::memory_resource* local_mr = pmr::get_default_resource(); pmr::memory_resource* local_mr = pmr::get_default_resource();
size_t small_str_bytes;
};
thread_local TL tl;
constexpr bool kUseSmallStrings = true;
} // namespace } // namespace
@ -45,8 +52,7 @@ static_assert(sizeof(CompactObj) == 18);
namespace detail { namespace detail {
CompactBlob::CompactBlob(string_view s, pmr::memory_resource* mr) CompactBlob::CompactBlob(string_view s, pmr::memory_resource* mr) : ptr_(nullptr), sz(s.size()) {
: ptr_(nullptr), sz(s.size()) {
if (sz) { if (sz) {
ptr_ = mr->allocate(sz); ptr_ = mr->allocate(sz);
memcpy(ptr_, s.data(), s.size()); memcpy(ptr_, s.data(), s.size());
@ -203,8 +209,16 @@ bool RobjWrapper::Equal(std::string_view sv) const {
using namespace std; using namespace std;
auto CompactObj::GetStats() -> Stats {
Stats res;
res.small_string_bytes = tl.small_str_bytes;
return res;
}
void CompactObj::InitThreadLocal(pmr::memory_resource* mr) { void CompactObj::InitThreadLocal(pmr::memory_resource* mr) {
local_mr = mr; tl.local_mr = mr;
SmallString::InitThreadLocal();
} }
CompactObj::~CompactObj() { CompactObj::~CompactObj() {
@ -229,6 +243,10 @@ size_t CompactObj::StrSize() const {
return taglen_; return taglen_;
} }
if (taglen_ == SMALL_TAG) {
return u_.small_str.size();
}
if (taglen_ == ROBJ_TAG) { if (taglen_ == ROBJ_TAG) {
return u_.r_obj.Size(); return u_.r_obj.Size();
} }
@ -243,6 +261,8 @@ uint64_t CompactObj::HashCode() const {
} }
switch (taglen_) { switch (taglen_) {
case SMALL_TAG:
return u_.small_str.HashCode();
case ROBJ_TAG: case ROBJ_TAG:
return u_.r_obj.HashCode(); return u_.r_obj.HashCode();
case INT_TAG: { case INT_TAG: {
@ -259,7 +279,7 @@ uint64_t CompactObj::HashCode(std::string_view str) {
return XXH3_64bits_withSeed(str.data(), str.size(), kHashSeed); return XXH3_64bits_withSeed(str.data(), str.size(), kHashSeed);
} }
unsigned CompactObj::ObjType() const { unsigned CompactObj::ObjType() const {
if (IsInline() || taglen_ == INT_TAG) if (IsInline() || taglen_ == INT_TAG || taglen_ == SMALL_TAG)
return OBJ_STRING; return OBJ_STRING;
if (taglen_ == ROBJ_TAG) if (taglen_ == ROBJ_TAG)
@ -301,7 +321,7 @@ void CompactObj::ImportRObj(robj* o) {
if (o->type == OBJ_STRING) { if (o->type == OBJ_STRING) {
std::string_view src((char*)o->ptr, sdslen((sds)o->ptr)); std::string_view src((char*)o->ptr, sdslen((sds)o->ptr));
u_.r_obj.blob.Assign(src, local_mr); u_.r_obj.blob.Assign(src, tl.local_mr);
decrRefCount(o); decrRefCount(o);
} else { // Non-string objects we move as is and release Robj wrapper. } else { // Non-string objects we move as is and release Robj wrapper.
u_.r_obj.blob.Set(o->ptr, 0); u_.r_obj.blob.Set(o->ptr, 0);
@ -313,20 +333,24 @@ void CompactObj::ImportRObj(robj* o) {
robj* CompactObj::AsRObj() const { robj* CompactObj::AsRObj() const {
CHECK_EQ(ROBJ_TAG, taglen_); CHECK_EQ(ROBJ_TAG, taglen_);
tmp_robj.encoding = u_.r_obj.encoding; robj* res = &tl.tmp_robj;
tmp_robj.type = u_.r_obj.type; res->encoding = u_.r_obj.encoding;
tmp_robj.lru = u_.r_obj.unneeded; res->type = u_.r_obj.type;
tmp_robj.ptr = u_.r_obj.blob.ptr(); res->lru = u_.r_obj.unneeded;
res->ptr = u_.r_obj.blob.ptr();
return &tmp_robj; return res;
} }
void CompactObj::SyncRObj() { void CompactObj::SyncRObj() {
CHECK_EQ(ROBJ_TAG, taglen_); CHECK_EQ(ROBJ_TAG, taglen_);
CHECK_EQ(u_.r_obj.type, tmp_robj.type);
u_.r_obj.encoding = tmp_robj.encoding; robj* obj = &tl.tmp_robj;
u_.r_obj.blob.Set(tmp_robj.ptr, 0);
CHECK_EQ(u_.r_obj.type, obj->type);
u_.r_obj.encoding = obj->encoding;
u_.r_obj.blob.Set(obj->ptr, 0);
} }
void CompactObj::SetInt(int64_t val) { void CompactObj::SetInt(int64_t val) {
@ -373,6 +397,14 @@ void CompactObj::SetString(std::string_view str) {
return; return;
} }
if (kUseSmallStrings && taglen_ == 0 && str.size() < (1 << 15)) {
u_.small_str.Reset();
SetMeta(SMALL_TAG, 0);
u_.small_str.Assign(str);
tl.small_str_bytes += u_.small_str.MallocUsed();
return;
}
if (taglen_ != ROBJ_TAG || u_.r_obj.type != OBJ_STRING) { if (taglen_ != ROBJ_TAG || u_.r_obj.type != OBJ_STRING) {
SetMeta(ROBJ_TAG); SetMeta(ROBJ_TAG);
u_.r_obj.type = OBJ_STRING; u_.r_obj.type = OBJ_STRING;
@ -381,7 +413,7 @@ void CompactObj::SetString(std::string_view str) {
DCHECK(taglen_ == ROBJ_TAG && u_.r_obj.type == OBJ_STRING); DCHECK(taglen_ == ROBJ_TAG && u_.r_obj.type == OBJ_STRING);
CHECK_EQ(OBJ_ENCODING_RAW, u_.r_obj.encoding); CHECK_EQ(OBJ_ENCODING_RAW, u_.r_obj.encoding);
u_.r_obj.blob.Assign(input, local_mr); u_.r_obj.blob.Assign(input, tl.local_mr);
} }
std::string_view CompactObj::GetSlice(std::string* scratch) const { std::string_view CompactObj::GetSlice(std::string* scratch) const {
@ -395,6 +427,11 @@ std::string_view CompactObj::GetSlice(std::string* scratch) const {
return u_.r_obj.blob.AsView(); return u_.r_obj.blob.AsView();
} }
if (taglen_ == SMALL_TAG) {
u_.small_str.Get(scratch);
return *scratch;
}
if (taglen_ == INT_TAG) { if (taglen_ == INT_TAG) {
absl::AlphaNum an(u_.ival); absl::AlphaNum an(u_.ival);
scratch->assign(an.Piece()); scratch->assign(an.Piece());
@ -412,7 +449,7 @@ bool CompactObj::HasAllocated() const {
(taglen_ == ROBJ_TAG && u_.r_obj.blob.ptr() == nullptr)) (taglen_ == ROBJ_TAG && u_.r_obj.blob.ptr() == nullptr))
return false; return false;
DCHECK(taglen_ == ROBJ_TAG); DCHECK(taglen_ == ROBJ_TAG || taglen_ == SMALL_TAG);
return true; return true;
} }
@ -436,7 +473,10 @@ void CompactObj::Free() {
DCHECK(HasAllocated()); DCHECK(HasAllocated());
if (taglen_ == ROBJ_TAG) { if (taglen_ == ROBJ_TAG) {
u_.r_obj.Free(local_mr); u_.r_obj.Free(tl.local_mr);
} else if (taglen_ == SMALL_TAG) {
tl.small_str_bytes -= u_.small_str.MallocUsed();
u_.small_str.Free();
} else { } else {
LOG(FATAL) << "Bad compact object type " << int(taglen_); LOG(FATAL) << "Bad compact object type " << int(taglen_);
} }
@ -452,6 +492,10 @@ size_t CompactObj::MallocUsed() const {
return u_.r_obj.MallocUsed(); return u_.r_obj.MallocUsed();
} }
if (taglen_ == SMALL_TAG) {
return u_.small_str.MallocUsed();
}
LOG(FATAL) << "TBD"; LOG(FATAL) << "TBD";
return 0; return 0;
} }
@ -460,19 +504,22 @@ bool CompactObj::operator==(const CompactObj& o) const {
if (taglen_ == ROBJ_TAG || o.taglen_ == ROBJ_TAG) { if (taglen_ == ROBJ_TAG || o.taglen_ == ROBJ_TAG) {
if (o.taglen_ != taglen_) if (o.taglen_ != taglen_)
return false; return false;
return u_.r_obj.Equal(o.u_.r_obj); return u_.r_obj.Equal(o.u_.r_obj);
} }
if (taglen_ != o.taglen_) if (taglen_ != o.taglen_)
return false; return false;
if (taglen_ == INT_TAG) if (taglen_ == INT_TAG)
return u_.ival == o.u_.ival; return u_.ival == o.u_.ival;
if (taglen_ == SMALL_TAG)
return u_.small_str.Equal(o.u_.small_str);
DCHECK(IsInline() && o.IsInline()); DCHECK(IsInline() && o.IsInline());
if (memcmp(u_.inline_str, o.u_.inline_str, taglen_) != 0) return memcmp(u_.inline_str, o.u_.inline_str, taglen_) == 0;
return false;
return true;
} }
bool CompactObj::EqualNonInline(std::string_view sv) const { bool CompactObj::EqualNonInline(std::string_view sv) const {
@ -483,6 +530,8 @@ bool CompactObj::EqualNonInline(std::string_view sv) const {
} }
case ROBJ_TAG: case ROBJ_TAG:
return u_.r_obj.Equal(sv); return u_.r_obj.Equal(sv);
case SMALL_TAG:
return u_.small_str.Equal(sv);
default: default:
break; break;
} }

View file

@ -9,6 +9,8 @@
#include <memory_resource> #include <memory_resource>
#include <optional> #include <optional>
#include "core/small_string.h"
typedef struct redisObject robj; typedef struct redisObject robj;
typedef struct quicklist quicklist; typedef struct quicklist quicklist;
@ -85,7 +87,7 @@ class CompactObj {
// 0-16 is reserved for inline lengths of string type. // 0-16 is reserved for inline lengths of string type.
enum TagEnum { enum TagEnum {
INT_TAG = 17, INT_TAG = 17,
SMALL_TAG = 18, // TBD SMALL_TAG = 18,
ROBJ_TAG = 19, ROBJ_TAG = 19,
}; };
@ -214,6 +216,13 @@ class CompactObj {
return kInlineLen; return kInlineLen;
} }
struct Stats {
size_t small_string_bytes = 0;
};
static Stats GetStats();
static void InitThreadLocal(std::pmr::memory_resource* mr); static void InitThreadLocal(std::pmr::memory_resource* mr);
private: private:
@ -241,6 +250,7 @@ class CompactObj {
union U { union U {
char inline_str[kInlineLen]; char inline_str[kInlineLen];
SmallString small_str;
detail::RobjWrapper r_obj; detail::RobjWrapper r_obj;
int64_t ival __attribute__((packed)); int64_t ival __attribute__((packed));

View file

@ -27,6 +27,7 @@ class CompactObjectTest : public ::testing::Test {
protected: protected:
static void SetUpTestCase() { static void SetUpTestCase() {
init_zmalloc_threadlocal(); init_zmalloc_threadlocal();
CompactObj::InitThreadLocal(pmr::get_default_resource());
} }
CompactObj cs_; CompactObj cs_;
@ -53,11 +54,12 @@ TEST_F(CompactObjectTest, Basic) {
TEST_F(CompactObjectTest, NonInline) { TEST_F(CompactObjectTest, NonInline) {
string s(22, 'a'); string s(22, 'a');
CompactObj a{s}; CompactObj obj{s};
XXH64_hash_t seed = 24061983; XXH64_hash_t seed = 24061983;
uint64_t expected_val = XXH3_64bits_withSeed(s.data(), s.size(), seed); uint64_t expected_val = XXH3_64bits_withSeed(s.data(), s.size(), seed);
EXPECT_EQ(18261733907982517826UL, expected_val); EXPECT_EQ(18261733907982517826UL, expected_val);
EXPECT_EQ(expected_val, a.HashCode()); EXPECT_EQ(expected_val, obj.HashCode());
EXPECT_EQ(s, obj);
} }
TEST_F(CompactObjectTest, Int) { TEST_F(CompactObjectTest, Int) {

13
core/core_types.h Normal file
View file

@ -0,0 +1,13 @@
// Copyright 2022, Roman Gershman. All rights reserved.
// See LICENSE for licensing terms.
//
#pragma once
#include <absl/types/span.h>
namespace dfly {
using MutableSlice = absl::Span<char>;
using MutSliceSpan = absl::Span<MutableSlice>;
} // namespace dfly

View file

@ -146,7 +146,7 @@ optional<int> FetchKey(lua_State* lua, const char* key) {
return type; return type;
} }
void SetGlobalArrayInternal(lua_State* lua, const char* name, Interpreter::MutSliceSpan args) { void SetGlobalArrayInternal(lua_State* lua, const char* name, MutSliceSpan args) {
lua_newtable(lua); lua_newtable(lua);
for (size_t j = 0; j < args.size(); j++) { for (size_t j = 0; j < args.size(); j++) {
lua_pushlstring(lua, args[j].data(), args[j].size()); lua_pushlstring(lua, args[j].data(), args[j].size());

View file

@ -4,12 +4,12 @@
#pragma once #pragma once
#include <absl/types/span.h>
#include <boost/fiber/mutex.hpp> #include <boost/fiber/mutex.hpp>
#include <functional> #include <functional>
#include <string_view> #include <string_view>
#include "core/core_types.h"
typedef struct lua_State lua_State; typedef struct lua_State lua_State;
namespace dfly { namespace dfly {
@ -32,8 +32,6 @@ class ObjectExplorer {
class Interpreter { class Interpreter {
public: public:
using MutableSlice = absl::Span<char>;
using MutSliceSpan = absl::Span<MutableSlice>;
using RedisFunc = std::function<void(MutSliceSpan, ObjectExplorer*)>; using RedisFunc = std::function<void(MutSliceSpan, ObjectExplorer*)>;
Interpreter(); Interpreter();

View file

@ -86,11 +86,11 @@ class InterpreterTest : public ::testing::Test {
}; };
void InterpreterTest::SetGlobalArray(const char* name, vector<string> vec) { void InterpreterTest::SetGlobalArray(const char* name, vector<string> vec) {
vector<Interpreter::MutableSlice> slices(vec.size()); vector<MutableSlice> slices(vec.size());
for (size_t i = 0; i < vec.size(); ++i) { for (size_t i = 0; i < vec.size(); ++i) {
slices[i] = Interpreter::MutableSlice{vec[i]}; slices[i] = MutableSlice{vec[i]};
} }
intptr_.SetGlobalArray(name, Interpreter::MutSliceSpan{slices}); intptr_.SetGlobalArray(name, MutSliceSpan{slices});
} }
bool InterpreterTest::Execute(string_view script) { bool InterpreterTest::Execute(string_view script) {
@ -239,7 +239,7 @@ TEST_F(InterpreterTest, Execute) {
} }
TEST_F(InterpreterTest, Call) { TEST_F(InterpreterTest, Call) {
auto cb = [](Interpreter::MutSliceSpan span, ObjectExplorer* reply) { auto cb = [](MutSliceSpan span, ObjectExplorer* reply) {
CHECK_GE(span.size(), 1u); CHECK_GE(span.size(), 1u);
string_view cmd{span[0].data(), span[0].size()}; string_view cmd{span[0].data(), span[0].size()};
if (cmd == "string") { if (cmd == "string") {
@ -275,7 +275,7 @@ TEST_F(InterpreterTest, Call) {
} }
TEST_F(InterpreterTest, CallArray) { TEST_F(InterpreterTest, CallArray) {
auto cb = [](Interpreter::MutSliceSpan span, ObjectExplorer* reply) { auto cb = [](MutSliceSpan span, ObjectExplorer* reply) {
reply->OnArrayStart(2); reply->OnArrayStart(2);
reply->OnArrayStart(1); reply->OnArrayStart(1);
reply->OnArrayStart(2); reply->OnArrayStart(2);
@ -294,7 +294,7 @@ TEST_F(InterpreterTest, CallArray) {
TEST_F(InterpreterTest, ArgKeys) { TEST_F(InterpreterTest, ArgKeys) {
vector<string> vec_arr{}; vector<string> vec_arr{};
vector<Interpreter::MutableSlice> slices; vector<MutableSlice> slices;
SetGlobalArray("ARGV", {"foo", "bar"}); SetGlobalArray("ARGV", {"foo", "bar"});
SetGlobalArray("KEYS", {"key1", "key2"}); SetGlobalArray("KEYS", {"key1", "key2"});
EXPECT_TRUE(Execute("return {ARGV[1], KEYS[1], KEYS[2]}")); EXPECT_TRUE(Execute("return {ARGV[1], KEYS[1], KEYS[2]}"));

22
core/segment_allocator.cc Normal file
View file

@ -0,0 +1,22 @@
// Copyright 2022, Roman Gershman. All rights reserved.
// See LICENSE for licensing terms.
//
#include "core/segment_allocator.h"
#include "base/logging.h"
namespace dfly {
SegmentAllocator::SegmentAllocator(mi_heap_t* heap) : heap_(heap) {
}
void SegmentAllocator::ValidateMapSize() {
CHECK_LT(address_table_.size(), 1u << 12)
<< "TODO: to monitor address_table_ map, it should not grow to such sizes";
// TODO: we should learn how large this maps can grow for very large databases.
// We should learn if mimalloc drops (deallocates) segments and we need to perform GC
// to protect ourselves from bloated address table.
}
} // namespace dfly

76
core/segment_allocator.h Normal file
View file

@ -0,0 +1,76 @@
// Copyright 2022, Roman Gershman. All rights reserved.
// See LICENSE for licensing terms.
//
#pragma once
#include <absl/container/flat_hash_map.h>
#include <mimalloc.h>
/***
* This class is tightly coupled with mimalloc segment allocation logic and is designed to provide
* a compact pointer representation (4bytes ptr) over 64bit address space that gives you
* 32GB of allocations with option to extend it to 32*256GB if needed.
*
*/
namespace dfly {
/**
* @brief Tightly coupled with mi_malloc 2.x implementation.
* Fetches 8MB segment pointers from the allocated pointers.
* Provides own indexing of small pointers to real address space using the segment ptrs/
*/
class SegmentAllocator {
static constexpr uint32_t kSegmentIdBits = 12;
static constexpr uint32_t kSegmentIdMask = (1 << kSegmentIdBits) - 1;
static constexpr uint64_t kSegmentAlignMask = ~((1 << 23) - 1);
public:
using Ptr = uint32_t;
SegmentAllocator(mi_heap_t* heap);
uint8_t* Translate(Ptr p) const {
return address_table_[p & kSegmentIdMask] + Offset(p);
}
std::pair<Ptr, uint8_t*> Allocate(uint32_t size);
void Free(Ptr ptr) {
mi_free(Translate(ptr));
}
mi_heap_t* heap() {
return heap_;
}
private:
static uint32_t Offset(Ptr p) {
return (p >> kSegmentIdBits) * 8;
}
void ValidateMapSize();
std::vector<uint8_t*> address_table_;
absl::flat_hash_map<uint64_t, uint16_t> rev_indx_;
mi_heap_t* heap_;
};
inline auto SegmentAllocator::Allocate(uint32_t size) -> std::pair<Ptr, uint8_t*> {
uint64_t ptr = (uint64_t)mi_heap_malloc(heap_, size);
uint64_t seg_ptr = ptr & kSegmentAlignMask;
// could be speed up using last used seg_ptr.
auto [it, inserted] = rev_indx_.emplace(seg_ptr, address_table_.size());
if (inserted) {
ValidateMapSize();
address_table_.push_back((uint8_t*)seg_ptr);
}
Ptr res = (((ptr - seg_ptr) / 8) << kSegmentIdBits) | it->second;
return std::make_pair(res, (uint8_t*)ptr);
}
} // namespace dfly

144
core/small_string.cc Normal file
View file

@ -0,0 +1,144 @@
// Copyright 2022, Roman Gershman. All rights reserved.
// See LICENSE for licensing terms.
//
#include "core/small_string.h"
#include <xxhash.h>
#include <memory>
#include "base/logging.h"
#include "core/segment_allocator.h"
namespace dfly {
using namespace std;
namespace {
class XXH3_Deleter {
public:
void operator()(XXH3_state_t* ptr) const {
XXH3_freeState(ptr);
}
};
struct TL {
unique_ptr<XXH3_state_t, XXH3_Deleter> xxh_state;
unique_ptr<SegmentAllocator> seg_alloc;
};
thread_local TL tl;
constexpr XXH64_hash_t kHashSeed = 24061983; // same as in compact_object.cc
} // namespace
void SmallString::InitThreadLocal() {
SegmentAllocator* ns = new SegmentAllocator(mi_heap_get_backing());
tl.seg_alloc.reset(ns);
tl.xxh_state.reset(XXH3_createState());
XXH3_64bits_reset_withSeed(tl.xxh_state.get(), kHashSeed);
}
static_assert(sizeof(SmallString) == 16);
// we should use only for sizes greater than kPrefLen
void SmallString::Assign(std::string_view s) {
DCHECK_GT(s.size(), kPrefLen);
uint8_t* realptr = nullptr;
if (size_ == 0) {
auto [sp, rp] = tl.seg_alloc->Allocate(s.size() - kPrefLen);
small_ptr_ = sp;
realptr = rp;
size_ = s.size();
} else if (size_ == s.size()) {
realptr = tl.seg_alloc->Translate(small_ptr_);
} else {
LOG(FATAL) << "TBD: Bad usage";
}
memcpy(prefix_, s.data(), kPrefLen);
memcpy(realptr, s.data() + kPrefLen, s.size() - kPrefLen);
}
void SmallString::Free() {
if (size_ <= kPrefLen)
return;
tl.seg_alloc->Free(small_ptr_);
size_ = 0;
}
bool SmallString::Equal(std::string_view o) const {
if (size_ != o.size())
return false;
if (size_ == 0)
return true;
DCHECK_GT(size_, kPrefLen);
if (memcmp(prefix_, o.data(), kPrefLen) != 0)
return false;
uint8_t* realp = tl.seg_alloc->Translate(small_ptr_);
return memcmp(realp, o.data() + kPrefLen, size_ - kPrefLen) == 0;
}
bool SmallString::Equal(const SmallString& os) const {
if (size_ != os.size_)
return false;
string_view me[2], other[2];
unsigned n1 = GetV(me);
unsigned n2 = os.GetV(other);
if (n1 != n2)
return false;
return me[0] == other[0] && me[1] == other[1];
}
uint64_t SmallString::HashCode() const {
DCHECK_GT(size_, kPrefLen);
string_view slice[2];
GetV(slice);
XXH3_state_t* state = tl.xxh_state.get();
XXH3_64bits_reset_withSeed(state, kHashSeed);
XXH3_64bits_update(state, slice[0].data(), slice[0].size());
XXH3_64bits_update(state, slice[1].data(), slice[1].size());
return XXH3_64bits_digest(state);
}
void SmallString::Get(std::string* dest) const {
dest->resize(size_);
if (size_) {
DCHECK_GT(size_, kPrefLen);
memcpy(dest->data(), prefix_, kPrefLen);
uint8_t* ptr = tl.seg_alloc->Translate(small_ptr_);
memcpy(dest->data() + kPrefLen, ptr, size_ - kPrefLen);
}
}
unsigned SmallString::GetV(string_view dest[2]) const {
if (size_ <= kPrefLen) {
dest[0] = string_view{prefix_, size_};
return 1;
}
dest[0] = string_view{prefix_, kPrefLen};
uint8_t* ptr = tl.seg_alloc->Translate(small_ptr_);
dest[1] = string_view{reinterpret_cast<char*>(ptr), size_ - kPrefLen};
return 2;
}
} // namespace dfly

60
core/small_string.h Normal file
View file

@ -0,0 +1,60 @@
// Copyright 2022, Roman Gershman. All rights reserved.
// See LICENSE for licensing terms.
//
#pragma once
#include <string_view>
#include "core/core_types.h"
namespace dfly {
// blob strings of upto ~64KB. Small sizes are probably predominant
// for in-mmeory workloads, especially for keys.
// Please note that this class does not have automatic constructors and destructors, therefore
// it requires explicit management.
class SmallString {
static constexpr unsigned kPrefLen = 10;
public:
static void InitThreadLocal();
void Reset() {
size_ = 0;
}
void Assign(std::string_view s);
void Free();
bool Equal(std::string_view o) const;
bool Equal(const SmallString& mps) const;
uint16_t size() const {
return size_;
}
uint64_t HashCode() const;
// I am lying here. we should use mi_malloc_usable size really.
uint16_t MallocUsed() const {
return size_ >= kPrefLen + 8 ? size_ - kPrefLen : 8;
}
void Get(std::string* dest) const;
// returns 1 or 2 slices representing this small string.
// Guarantees zero copy, i.e. dest will not point to any of external buffers.
// With current implementation, it will return 2 slices for a non-empty string.
unsigned GetV(std::string_view dest[2]) const;
private:
// prefix of the string that is broken down into 2 parts.
char prefix_[kPrefLen];
uint32_t small_ptr_; // 32GB capacity because we ignore 3 lsb bits (i.e. x8).
uint16_t size_; // uint16_t - total size (including prefix)
} __attribute__((packed));
} // namespace dfly

View file

@ -25,6 +25,8 @@ using namespace util;
#define ADD(x) (x) += o.x #define ADD(x) (x) += o.x
DbStats& DbStats::operator+=(const DbStats& o) { DbStats& DbStats::operator+=(const DbStats& o) {
static_assert(sizeof(DbStats) == 56);
ADD(key_count); ADD(key_count);
ADD(expire_count); ADD(expire_count);
ADD(bucket_count); ADD(bucket_count);
@ -32,6 +34,7 @@ DbStats& DbStats::operator+=(const DbStats& o) {
ADD(obj_memory_usage); ADD(obj_memory_usage);
ADD(table_mem_usage); ADD(table_mem_usage);
ADD(small_string_bytes);
return *this; return *this;
} }
@ -84,6 +87,7 @@ auto DbSlice::GetStats() const -> Stats {
s.db.inline_keys += db->stats.inline_keys; s.db.inline_keys += db->stats.inline_keys;
s.db.table_mem_usage += (db->prime_table.mem_usage() + db->expire_table.mem_usage()); s.db.table_mem_usage += (db->prime_table.mem_usage() + db->expire_table.mem_usage());
} }
s.db.small_string_bytes = CompactObj::GetStats().small_string_bytes;
return s; return s;
} }
@ -318,10 +322,10 @@ pair<MainIterator, bool> DbSlice::AddIfNotExist(DbIndex db_ind, string_view key,
uint64_t expire_at_ms) { uint64_t expire_at_ms) {
DCHECK(!obj.IsRef()); DCHECK(!obj.IsRef());
auto& db = db_arr_[db_ind]; auto& db = *db_arr_[db_ind];
CompactObj co_key{key}; CompactObj co_key{key};
auto [new_entry, inserted] = db->prime_table.Insert(std::move(co_key), std::move(obj)); auto [new_entry, inserted] = db.prime_table.Insert(std::move(co_key), std::move(obj));
// in this case obj won't be moved and will be destroyed during unwinding. // in this case obj won't be moved and will be destroyed during unwinding.
if (!inserted) if (!inserted)
@ -329,13 +333,13 @@ pair<MainIterator, bool> DbSlice::AddIfNotExist(DbIndex db_ind, string_view key,
new_entry.SetVersion(NextVersion()); new_entry.SetVersion(NextVersion());
db->stats.inline_keys += new_entry->first.IsInline(); db.stats.inline_keys += new_entry->first.IsInline();
db->stats.obj_memory_usage += (new_entry->first.MallocUsed() + new_entry->second.MallocUsed()); db.stats.obj_memory_usage += (new_entry->first.MallocUsed() + new_entry->second.MallocUsed());
if (expire_at_ms) { if (expire_at_ms) {
new_entry->second.SetExpire(true); new_entry->second.SetExpire(true);
CHECK(db->expire_table.Insert(new_entry->first.AsRef(), expire_at_ms).second); CHECK(db.expire_table.Insert(new_entry->first.AsRef(), expire_at_ms).second);
} }
return make_pair(new_entry, true); return make_pair(new_entry, true);

View file

@ -38,6 +38,8 @@ struct DbStats {
// Memory used by dictionaries. // Memory used by dictionaries.
size_t table_mem_usage = 0; size_t table_mem_usage = 0;
size_t small_string_bytes = 0;
DbStats& operator+=(const DbStats& o); DbStats& operator+=(const DbStats& o);
}; };

View file

@ -395,6 +395,7 @@ tcp_port:)";
absl::StrAppend(&info, "table_used_memory:", m.db.table_mem_usage, "\n"); absl::StrAppend(&info, "table_used_memory:", m.db.table_mem_usage, "\n");
absl::StrAppend(&info, "num_entries:", m.db.key_count, "\n"); absl::StrAppend(&info, "num_entries:", m.db.key_count, "\n");
absl::StrAppend(&info, "inline_keys:", m.db.inline_keys, "\n"); absl::StrAppend(&info, "inline_keys:", m.db.inline_keys, "\n");
absl::StrAppend(&info, "small_string_bytes:", m.db.small_string_bytes, "\n");
} }
if (should_enter("STATS")) { if (should_enter("STATS")) {