mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2024-12-14 11:58:02 +00:00
Introduce SmallString as another option for CompactObject
This commit is contained in:
parent
a93940913b
commit
737c5fed71
16 changed files with 435 additions and 43 deletions
11
.gitorderfile
Normal file
11
.gitorderfile
Normal file
|
@ -0,0 +1,11 @@
|
|||
*.py
|
||||
*.md
|
||||
*.in
|
||||
*.txt
|
||||
*.sh
|
||||
*.yml
|
||||
*.h
|
||||
*.cc
|
||||
*.lua
|
||||
*.go
|
||||
*
|
|
@ -1,5 +1,5 @@
|
|||
add_library(dfly_core compact_object.cc dragonfly_core.cc interpreter.cc
|
||||
tx_queue.cc)
|
||||
segment_allocator.cc small_string.cc tx_queue.cc)
|
||||
cxx_link(dfly_core base absl::flat_hash_map absl::str_format redis_lib TRDP::lua
|
||||
Boost::fiber crypto)
|
||||
|
||||
|
|
|
@ -34,10 +34,17 @@ size_t QlUsedSize(quicklist* ql) {
|
|||
return res;
|
||||
}
|
||||
|
||||
thread_local robj tmp_robj{
|
||||
struct TL {
|
||||
robj tmp_robj{
|
||||
.type = 0, .encoding = 0, .lru = 0, .refcount = OBJ_STATIC_REFCOUNT, .ptr = nullptr};
|
||||
|
||||
thread_local pmr::memory_resource* local_mr = pmr::get_default_resource();
|
||||
pmr::memory_resource* local_mr = pmr::get_default_resource();
|
||||
size_t small_str_bytes;
|
||||
};
|
||||
|
||||
thread_local TL tl;
|
||||
|
||||
constexpr bool kUseSmallStrings = true;
|
||||
|
||||
} // namespace
|
||||
|
||||
|
@ -45,8 +52,7 @@ static_assert(sizeof(CompactObj) == 18);
|
|||
|
||||
namespace detail {
|
||||
|
||||
CompactBlob::CompactBlob(string_view s, pmr::memory_resource* mr)
|
||||
: ptr_(nullptr), sz(s.size()) {
|
||||
CompactBlob::CompactBlob(string_view s, pmr::memory_resource* mr) : ptr_(nullptr), sz(s.size()) {
|
||||
if (sz) {
|
||||
ptr_ = mr->allocate(sz);
|
||||
memcpy(ptr_, s.data(), s.size());
|
||||
|
@ -203,8 +209,16 @@ bool RobjWrapper::Equal(std::string_view sv) const {
|
|||
|
||||
using namespace std;
|
||||
|
||||
auto CompactObj::GetStats() -> Stats {
|
||||
Stats res;
|
||||
res.small_string_bytes = tl.small_str_bytes;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void CompactObj::InitThreadLocal(pmr::memory_resource* mr) {
|
||||
local_mr = mr;
|
||||
tl.local_mr = mr;
|
||||
SmallString::InitThreadLocal();
|
||||
}
|
||||
|
||||
CompactObj::~CompactObj() {
|
||||
|
@ -229,6 +243,10 @@ size_t CompactObj::StrSize() const {
|
|||
return taglen_;
|
||||
}
|
||||
|
||||
if (taglen_ == SMALL_TAG) {
|
||||
return u_.small_str.size();
|
||||
}
|
||||
|
||||
if (taglen_ == ROBJ_TAG) {
|
||||
return u_.r_obj.Size();
|
||||
}
|
||||
|
@ -243,6 +261,8 @@ uint64_t CompactObj::HashCode() const {
|
|||
}
|
||||
|
||||
switch (taglen_) {
|
||||
case SMALL_TAG:
|
||||
return u_.small_str.HashCode();
|
||||
case ROBJ_TAG:
|
||||
return u_.r_obj.HashCode();
|
||||
case INT_TAG: {
|
||||
|
@ -259,7 +279,7 @@ uint64_t CompactObj::HashCode(std::string_view str) {
|
|||
return XXH3_64bits_withSeed(str.data(), str.size(), kHashSeed);
|
||||
}
|
||||
unsigned CompactObj::ObjType() const {
|
||||
if (IsInline() || taglen_ == INT_TAG)
|
||||
if (IsInline() || taglen_ == INT_TAG || taglen_ == SMALL_TAG)
|
||||
return OBJ_STRING;
|
||||
|
||||
if (taglen_ == ROBJ_TAG)
|
||||
|
@ -301,7 +321,7 @@ void CompactObj::ImportRObj(robj* o) {
|
|||
|
||||
if (o->type == OBJ_STRING) {
|
||||
std::string_view src((char*)o->ptr, sdslen((sds)o->ptr));
|
||||
u_.r_obj.blob.Assign(src, local_mr);
|
||||
u_.r_obj.blob.Assign(src, tl.local_mr);
|
||||
decrRefCount(o);
|
||||
} else { // Non-string objects we move as is and release Robj wrapper.
|
||||
u_.r_obj.blob.Set(o->ptr, 0);
|
||||
|
@ -313,20 +333,24 @@ void CompactObj::ImportRObj(robj* o) {
|
|||
robj* CompactObj::AsRObj() const {
|
||||
CHECK_EQ(ROBJ_TAG, taglen_);
|
||||
|
||||
tmp_robj.encoding = u_.r_obj.encoding;
|
||||
tmp_robj.type = u_.r_obj.type;
|
||||
tmp_robj.lru = u_.r_obj.unneeded;
|
||||
tmp_robj.ptr = u_.r_obj.blob.ptr();
|
||||
robj* res = &tl.tmp_robj;
|
||||
res->encoding = u_.r_obj.encoding;
|
||||
res->type = u_.r_obj.type;
|
||||
res->lru = u_.r_obj.unneeded;
|
||||
res->ptr = u_.r_obj.blob.ptr();
|
||||
|
||||
return &tmp_robj;
|
||||
return res;
|
||||
}
|
||||
|
||||
void CompactObj::SyncRObj() {
|
||||
CHECK_EQ(ROBJ_TAG, taglen_);
|
||||
CHECK_EQ(u_.r_obj.type, tmp_robj.type);
|
||||
|
||||
u_.r_obj.encoding = tmp_robj.encoding;
|
||||
u_.r_obj.blob.Set(tmp_robj.ptr, 0);
|
||||
robj* obj = &tl.tmp_robj;
|
||||
|
||||
CHECK_EQ(u_.r_obj.type, obj->type);
|
||||
|
||||
u_.r_obj.encoding = obj->encoding;
|
||||
u_.r_obj.blob.Set(obj->ptr, 0);
|
||||
}
|
||||
|
||||
void CompactObj::SetInt(int64_t val) {
|
||||
|
@ -373,6 +397,14 @@ void CompactObj::SetString(std::string_view str) {
|
|||
return;
|
||||
}
|
||||
|
||||
if (kUseSmallStrings && taglen_ == 0 && str.size() < (1 << 15)) {
|
||||
u_.small_str.Reset();
|
||||
SetMeta(SMALL_TAG, 0);
|
||||
u_.small_str.Assign(str);
|
||||
tl.small_str_bytes += u_.small_str.MallocUsed();
|
||||
return;
|
||||
}
|
||||
|
||||
if (taglen_ != ROBJ_TAG || u_.r_obj.type != OBJ_STRING) {
|
||||
SetMeta(ROBJ_TAG);
|
||||
u_.r_obj.type = OBJ_STRING;
|
||||
|
@ -381,7 +413,7 @@ void CompactObj::SetString(std::string_view str) {
|
|||
|
||||
DCHECK(taglen_ == ROBJ_TAG && u_.r_obj.type == OBJ_STRING);
|
||||
CHECK_EQ(OBJ_ENCODING_RAW, u_.r_obj.encoding);
|
||||
u_.r_obj.blob.Assign(input, local_mr);
|
||||
u_.r_obj.blob.Assign(input, tl.local_mr);
|
||||
}
|
||||
|
||||
std::string_view CompactObj::GetSlice(std::string* scratch) const {
|
||||
|
@ -395,6 +427,11 @@ std::string_view CompactObj::GetSlice(std::string* scratch) const {
|
|||
return u_.r_obj.blob.AsView();
|
||||
}
|
||||
|
||||
if (taglen_ == SMALL_TAG) {
|
||||
u_.small_str.Get(scratch);
|
||||
return *scratch;
|
||||
}
|
||||
|
||||
if (taglen_ == INT_TAG) {
|
||||
absl::AlphaNum an(u_.ival);
|
||||
scratch->assign(an.Piece());
|
||||
|
@ -412,7 +449,7 @@ bool CompactObj::HasAllocated() const {
|
|||
(taglen_ == ROBJ_TAG && u_.r_obj.blob.ptr() == nullptr))
|
||||
return false;
|
||||
|
||||
DCHECK(taglen_ == ROBJ_TAG);
|
||||
DCHECK(taglen_ == ROBJ_TAG || taglen_ == SMALL_TAG);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -436,7 +473,10 @@ void CompactObj::Free() {
|
|||
DCHECK(HasAllocated());
|
||||
|
||||
if (taglen_ == ROBJ_TAG) {
|
||||
u_.r_obj.Free(local_mr);
|
||||
u_.r_obj.Free(tl.local_mr);
|
||||
} else if (taglen_ == SMALL_TAG) {
|
||||
tl.small_str_bytes -= u_.small_str.MallocUsed();
|
||||
u_.small_str.Free();
|
||||
} else {
|
||||
LOG(FATAL) << "Bad compact object type " << int(taglen_);
|
||||
}
|
||||
|
@ -452,6 +492,10 @@ size_t CompactObj::MallocUsed() const {
|
|||
return u_.r_obj.MallocUsed();
|
||||
}
|
||||
|
||||
if (taglen_ == SMALL_TAG) {
|
||||
return u_.small_str.MallocUsed();
|
||||
}
|
||||
|
||||
LOG(FATAL) << "TBD";
|
||||
return 0;
|
||||
}
|
||||
|
@ -460,19 +504,22 @@ bool CompactObj::operator==(const CompactObj& o) const {
|
|||
if (taglen_ == ROBJ_TAG || o.taglen_ == ROBJ_TAG) {
|
||||
if (o.taglen_ != taglen_)
|
||||
return false;
|
||||
|
||||
return u_.r_obj.Equal(o.u_.r_obj);
|
||||
}
|
||||
|
||||
if (taglen_ != o.taglen_)
|
||||
return false;
|
||||
|
||||
if (taglen_ == INT_TAG)
|
||||
return u_.ival == o.u_.ival;
|
||||
|
||||
if (taglen_ == SMALL_TAG)
|
||||
return u_.small_str.Equal(o.u_.small_str);
|
||||
|
||||
DCHECK(IsInline() && o.IsInline());
|
||||
|
||||
if (memcmp(u_.inline_str, o.u_.inline_str, taglen_) != 0)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
return memcmp(u_.inline_str, o.u_.inline_str, taglen_) == 0;
|
||||
}
|
||||
|
||||
bool CompactObj::EqualNonInline(std::string_view sv) const {
|
||||
|
@ -483,6 +530,8 @@ bool CompactObj::EqualNonInline(std::string_view sv) const {
|
|||
}
|
||||
case ROBJ_TAG:
|
||||
return u_.r_obj.Equal(sv);
|
||||
case SMALL_TAG:
|
||||
return u_.small_str.Equal(sv);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -9,6 +9,8 @@
|
|||
#include <memory_resource>
|
||||
#include <optional>
|
||||
|
||||
#include "core/small_string.h"
|
||||
|
||||
typedef struct redisObject robj;
|
||||
typedef struct quicklist quicklist;
|
||||
|
||||
|
@ -85,7 +87,7 @@ class CompactObj {
|
|||
// 0-16 is reserved for inline lengths of string type.
|
||||
enum TagEnum {
|
||||
INT_TAG = 17,
|
||||
SMALL_TAG = 18, // TBD
|
||||
SMALL_TAG = 18,
|
||||
ROBJ_TAG = 19,
|
||||
};
|
||||
|
||||
|
@ -214,6 +216,13 @@ class CompactObj {
|
|||
return kInlineLen;
|
||||
}
|
||||
|
||||
|
||||
struct Stats {
|
||||
size_t small_string_bytes = 0;
|
||||
};
|
||||
|
||||
static Stats GetStats();
|
||||
|
||||
static void InitThreadLocal(std::pmr::memory_resource* mr);
|
||||
|
||||
private:
|
||||
|
@ -241,6 +250,7 @@ class CompactObj {
|
|||
union U {
|
||||
char inline_str[kInlineLen];
|
||||
|
||||
SmallString small_str;
|
||||
detail::RobjWrapper r_obj;
|
||||
int64_t ival __attribute__((packed));
|
||||
|
||||
|
|
|
@ -27,6 +27,7 @@ class CompactObjectTest : public ::testing::Test {
|
|||
protected:
|
||||
static void SetUpTestCase() {
|
||||
init_zmalloc_threadlocal();
|
||||
CompactObj::InitThreadLocal(pmr::get_default_resource());
|
||||
}
|
||||
|
||||
CompactObj cs_;
|
||||
|
@ -53,11 +54,12 @@ TEST_F(CompactObjectTest, Basic) {
|
|||
|
||||
TEST_F(CompactObjectTest, NonInline) {
|
||||
string s(22, 'a');
|
||||
CompactObj a{s};
|
||||
CompactObj obj{s};
|
||||
XXH64_hash_t seed = 24061983;
|
||||
uint64_t expected_val = XXH3_64bits_withSeed(s.data(), s.size(), seed);
|
||||
EXPECT_EQ(18261733907982517826UL, expected_val);
|
||||
EXPECT_EQ(expected_val, a.HashCode());
|
||||
EXPECT_EQ(expected_val, obj.HashCode());
|
||||
EXPECT_EQ(s, obj);
|
||||
}
|
||||
|
||||
TEST_F(CompactObjectTest, Int) {
|
||||
|
|
13
core/core_types.h
Normal file
13
core/core_types.h
Normal file
|
@ -0,0 +1,13 @@
|
|||
// Copyright 2022, Roman Gershman. All rights reserved.
|
||||
// See LICENSE for licensing terms.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <absl/types/span.h>
|
||||
|
||||
namespace dfly {
|
||||
using MutableSlice = absl::Span<char>;
|
||||
using MutSliceSpan = absl::Span<MutableSlice>;
|
||||
|
||||
} // namespace dfly
|
|
@ -146,7 +146,7 @@ optional<int> FetchKey(lua_State* lua, const char* key) {
|
|||
return type;
|
||||
}
|
||||
|
||||
void SetGlobalArrayInternal(lua_State* lua, const char* name, Interpreter::MutSliceSpan args) {
|
||||
void SetGlobalArrayInternal(lua_State* lua, const char* name, MutSliceSpan args) {
|
||||
lua_newtable(lua);
|
||||
for (size_t j = 0; j < args.size(); j++) {
|
||||
lua_pushlstring(lua, args[j].data(), args[j].size());
|
||||
|
|
|
@ -4,12 +4,12 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <absl/types/span.h>
|
||||
|
||||
#include <boost/fiber/mutex.hpp>
|
||||
#include <functional>
|
||||
#include <string_view>
|
||||
|
||||
#include "core/core_types.h"
|
||||
|
||||
typedef struct lua_State lua_State;
|
||||
|
||||
namespace dfly {
|
||||
|
@ -32,8 +32,6 @@ class ObjectExplorer {
|
|||
|
||||
class Interpreter {
|
||||
public:
|
||||
using MutableSlice = absl::Span<char>;
|
||||
using MutSliceSpan = absl::Span<MutableSlice>;
|
||||
using RedisFunc = std::function<void(MutSliceSpan, ObjectExplorer*)>;
|
||||
|
||||
Interpreter();
|
||||
|
|
|
@ -86,11 +86,11 @@ class InterpreterTest : public ::testing::Test {
|
|||
};
|
||||
|
||||
void InterpreterTest::SetGlobalArray(const char* name, vector<string> vec) {
|
||||
vector<Interpreter::MutableSlice> slices(vec.size());
|
||||
vector<MutableSlice> slices(vec.size());
|
||||
for (size_t i = 0; i < vec.size(); ++i) {
|
||||
slices[i] = Interpreter::MutableSlice{vec[i]};
|
||||
slices[i] = MutableSlice{vec[i]};
|
||||
}
|
||||
intptr_.SetGlobalArray(name, Interpreter::MutSliceSpan{slices});
|
||||
intptr_.SetGlobalArray(name, MutSliceSpan{slices});
|
||||
}
|
||||
|
||||
bool InterpreterTest::Execute(string_view script) {
|
||||
|
@ -239,7 +239,7 @@ TEST_F(InterpreterTest, Execute) {
|
|||
}
|
||||
|
||||
TEST_F(InterpreterTest, Call) {
|
||||
auto cb = [](Interpreter::MutSliceSpan span, ObjectExplorer* reply) {
|
||||
auto cb = [](MutSliceSpan span, ObjectExplorer* reply) {
|
||||
CHECK_GE(span.size(), 1u);
|
||||
string_view cmd{span[0].data(), span[0].size()};
|
||||
if (cmd == "string") {
|
||||
|
@ -275,7 +275,7 @@ TEST_F(InterpreterTest, Call) {
|
|||
}
|
||||
|
||||
TEST_F(InterpreterTest, CallArray) {
|
||||
auto cb = [](Interpreter::MutSliceSpan span, ObjectExplorer* reply) {
|
||||
auto cb = [](MutSliceSpan span, ObjectExplorer* reply) {
|
||||
reply->OnArrayStart(2);
|
||||
reply->OnArrayStart(1);
|
||||
reply->OnArrayStart(2);
|
||||
|
@ -294,7 +294,7 @@ TEST_F(InterpreterTest, CallArray) {
|
|||
|
||||
TEST_F(InterpreterTest, ArgKeys) {
|
||||
vector<string> vec_arr{};
|
||||
vector<Interpreter::MutableSlice> slices;
|
||||
vector<MutableSlice> slices;
|
||||
SetGlobalArray("ARGV", {"foo", "bar"});
|
||||
SetGlobalArray("KEYS", {"key1", "key2"});
|
||||
EXPECT_TRUE(Execute("return {ARGV[1], KEYS[1], KEYS[2]}"));
|
||||
|
|
22
core/segment_allocator.cc
Normal file
22
core/segment_allocator.cc
Normal file
|
@ -0,0 +1,22 @@
|
|||
// Copyright 2022, Roman Gershman. All rights reserved.
|
||||
// See LICENSE for licensing terms.
|
||||
//
|
||||
#include "core/segment_allocator.h"
|
||||
|
||||
#include "base/logging.h"
|
||||
|
||||
namespace dfly {
|
||||
|
||||
SegmentAllocator::SegmentAllocator(mi_heap_t* heap) : heap_(heap) {
|
||||
}
|
||||
|
||||
void SegmentAllocator::ValidateMapSize() {
|
||||
CHECK_LT(address_table_.size(), 1u << 12)
|
||||
<< "TODO: to monitor address_table_ map, it should not grow to such sizes";
|
||||
|
||||
// TODO: we should learn how large this maps can grow for very large databases.
|
||||
// We should learn if mimalloc drops (deallocates) segments and we need to perform GC
|
||||
// to protect ourselves from bloated address table.
|
||||
}
|
||||
|
||||
} // namespace dfly
|
76
core/segment_allocator.h
Normal file
76
core/segment_allocator.h
Normal file
|
@ -0,0 +1,76 @@
|
|||
// Copyright 2022, Roman Gershman. All rights reserved.
|
||||
// See LICENSE for licensing terms.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#include <absl/container/flat_hash_map.h>
|
||||
#include <mimalloc.h>
|
||||
|
||||
/***
|
||||
* This class is tightly coupled with mimalloc segment allocation logic and is designed to provide
|
||||
* a compact pointer representation (4bytes ptr) over 64bit address space that gives you
|
||||
* 32GB of allocations with option to extend it to 32*256GB if needed.
|
||||
*
|
||||
*/
|
||||
|
||||
namespace dfly {
|
||||
|
||||
/**
|
||||
* @brief Tightly coupled with mi_malloc 2.x implementation.
|
||||
* Fetches 8MB segment pointers from the allocated pointers.
|
||||
* Provides own indexing of small pointers to real address space using the segment ptrs/
|
||||
*/
|
||||
|
||||
class SegmentAllocator {
|
||||
static constexpr uint32_t kSegmentIdBits = 12;
|
||||
static constexpr uint32_t kSegmentIdMask = (1 << kSegmentIdBits) - 1;
|
||||
static constexpr uint64_t kSegmentAlignMask = ~((1 << 23) - 1);
|
||||
|
||||
public:
|
||||
using Ptr = uint32_t;
|
||||
|
||||
SegmentAllocator(mi_heap_t* heap);
|
||||
|
||||
uint8_t* Translate(Ptr p) const {
|
||||
return address_table_[p & kSegmentIdMask] + Offset(p);
|
||||
}
|
||||
|
||||
std::pair<Ptr, uint8_t*> Allocate(uint32_t size);
|
||||
|
||||
void Free(Ptr ptr) {
|
||||
mi_free(Translate(ptr));
|
||||
}
|
||||
|
||||
mi_heap_t* heap() {
|
||||
return heap_;
|
||||
}
|
||||
|
||||
private:
|
||||
static uint32_t Offset(Ptr p) {
|
||||
return (p >> kSegmentIdBits) * 8;
|
||||
}
|
||||
|
||||
void ValidateMapSize();
|
||||
|
||||
std::vector<uint8_t*> address_table_;
|
||||
absl::flat_hash_map<uint64_t, uint16_t> rev_indx_;
|
||||
mi_heap_t* heap_;
|
||||
};
|
||||
|
||||
inline auto SegmentAllocator::Allocate(uint32_t size) -> std::pair<Ptr, uint8_t*> {
|
||||
uint64_t ptr = (uint64_t)mi_heap_malloc(heap_, size);
|
||||
uint64_t seg_ptr = ptr & kSegmentAlignMask;
|
||||
|
||||
// could be speed up using last used seg_ptr.
|
||||
auto [it, inserted] = rev_indx_.emplace(seg_ptr, address_table_.size());
|
||||
if (inserted) {
|
||||
ValidateMapSize();
|
||||
address_table_.push_back((uint8_t*)seg_ptr);
|
||||
}
|
||||
|
||||
Ptr res = (((ptr - seg_ptr) / 8) << kSegmentIdBits) | it->second;
|
||||
|
||||
return std::make_pair(res, (uint8_t*)ptr);
|
||||
}
|
||||
|
||||
} // namespace dfly
|
144
core/small_string.cc
Normal file
144
core/small_string.cc
Normal file
|
@ -0,0 +1,144 @@
|
|||
// Copyright 2022, Roman Gershman. All rights reserved.
|
||||
// See LICENSE for licensing terms.
|
||||
//
|
||||
|
||||
#include "core/small_string.h"
|
||||
|
||||
#include <xxhash.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "base/logging.h"
|
||||
#include "core/segment_allocator.h"
|
||||
|
||||
namespace dfly {
|
||||
using namespace std;
|
||||
|
||||
namespace {
|
||||
|
||||
class XXH3_Deleter {
|
||||
public:
|
||||
void operator()(XXH3_state_t* ptr) const {
|
||||
XXH3_freeState(ptr);
|
||||
}
|
||||
};
|
||||
|
||||
struct TL {
|
||||
unique_ptr<XXH3_state_t, XXH3_Deleter> xxh_state;
|
||||
unique_ptr<SegmentAllocator> seg_alloc;
|
||||
};
|
||||
|
||||
thread_local TL tl;
|
||||
|
||||
constexpr XXH64_hash_t kHashSeed = 24061983; // same as in compact_object.cc
|
||||
|
||||
} // namespace
|
||||
|
||||
void SmallString::InitThreadLocal() {
|
||||
SegmentAllocator* ns = new SegmentAllocator(mi_heap_get_backing());
|
||||
|
||||
tl.seg_alloc.reset(ns);
|
||||
tl.xxh_state.reset(XXH3_createState());
|
||||
XXH3_64bits_reset_withSeed(tl.xxh_state.get(), kHashSeed);
|
||||
}
|
||||
|
||||
static_assert(sizeof(SmallString) == 16);
|
||||
|
||||
// we should use only for sizes greater than kPrefLen
|
||||
void SmallString::Assign(std::string_view s) {
|
||||
DCHECK_GT(s.size(), kPrefLen);
|
||||
|
||||
uint8_t* realptr = nullptr;
|
||||
|
||||
if (size_ == 0) {
|
||||
auto [sp, rp] = tl.seg_alloc->Allocate(s.size() - kPrefLen);
|
||||
small_ptr_ = sp;
|
||||
realptr = rp;
|
||||
|
||||
size_ = s.size();
|
||||
} else if (size_ == s.size()) {
|
||||
realptr = tl.seg_alloc->Translate(small_ptr_);
|
||||
} else {
|
||||
LOG(FATAL) << "TBD: Bad usage";
|
||||
}
|
||||
|
||||
memcpy(prefix_, s.data(), kPrefLen);
|
||||
memcpy(realptr, s.data() + kPrefLen, s.size() - kPrefLen);
|
||||
}
|
||||
|
||||
void SmallString::Free() {
|
||||
if (size_ <= kPrefLen)
|
||||
return;
|
||||
|
||||
tl.seg_alloc->Free(small_ptr_);
|
||||
size_ = 0;
|
||||
}
|
||||
|
||||
bool SmallString::Equal(std::string_view o) const {
|
||||
if (size_ != o.size())
|
||||
return false;
|
||||
|
||||
if (size_ == 0)
|
||||
return true;
|
||||
|
||||
DCHECK_GT(size_, kPrefLen);
|
||||
|
||||
if (memcmp(prefix_, o.data(), kPrefLen) != 0)
|
||||
return false;
|
||||
|
||||
uint8_t* realp = tl.seg_alloc->Translate(small_ptr_);
|
||||
|
||||
return memcmp(realp, o.data() + kPrefLen, size_ - kPrefLen) == 0;
|
||||
}
|
||||
|
||||
bool SmallString::Equal(const SmallString& os) const {
|
||||
if (size_ != os.size_)
|
||||
return false;
|
||||
|
||||
string_view me[2], other[2];
|
||||
unsigned n1 = GetV(me);
|
||||
unsigned n2 = os.GetV(other);
|
||||
|
||||
if (n1 != n2)
|
||||
return false;
|
||||
|
||||
return me[0] == other[0] && me[1] == other[1];
|
||||
}
|
||||
|
||||
uint64_t SmallString::HashCode() const {
|
||||
DCHECK_GT(size_, kPrefLen);
|
||||
|
||||
string_view slice[2];
|
||||
|
||||
GetV(slice);
|
||||
XXH3_state_t* state = tl.xxh_state.get();
|
||||
XXH3_64bits_reset_withSeed(state, kHashSeed);
|
||||
XXH3_64bits_update(state, slice[0].data(), slice[0].size());
|
||||
XXH3_64bits_update(state, slice[1].data(), slice[1].size());
|
||||
|
||||
return XXH3_64bits_digest(state);
|
||||
}
|
||||
|
||||
void SmallString::Get(std::string* dest) const {
|
||||
dest->resize(size_);
|
||||
if (size_) {
|
||||
DCHECK_GT(size_, kPrefLen);
|
||||
memcpy(dest->data(), prefix_, kPrefLen);
|
||||
uint8_t* ptr = tl.seg_alloc->Translate(small_ptr_);
|
||||
memcpy(dest->data() + kPrefLen, ptr, size_ - kPrefLen);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned SmallString::GetV(string_view dest[2]) const {
|
||||
if (size_ <= kPrefLen) {
|
||||
dest[0] = string_view{prefix_, size_};
|
||||
return 1;
|
||||
}
|
||||
|
||||
dest[0] = string_view{prefix_, kPrefLen};
|
||||
uint8_t* ptr = tl.seg_alloc->Translate(small_ptr_);
|
||||
dest[1] = string_view{reinterpret_cast<char*>(ptr), size_ - kPrefLen};
|
||||
return 2;
|
||||
}
|
||||
|
||||
} // namespace dfly
|
60
core/small_string.h
Normal file
60
core/small_string.h
Normal file
|
@ -0,0 +1,60 @@
|
|||
// Copyright 2022, Roman Gershman. All rights reserved.
|
||||
// See LICENSE for licensing terms.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#include <string_view>
|
||||
|
||||
#include "core/core_types.h"
|
||||
|
||||
namespace dfly {
|
||||
|
||||
// blob strings of upto ~64KB. Small sizes are probably predominant
|
||||
// for in-mmeory workloads, especially for keys.
|
||||
// Please note that this class does not have automatic constructors and destructors, therefore
|
||||
// it requires explicit management.
|
||||
class SmallString {
|
||||
static constexpr unsigned kPrefLen = 10;
|
||||
|
||||
public:
|
||||
|
||||
static void InitThreadLocal();
|
||||
|
||||
void Reset() {
|
||||
size_ = 0;
|
||||
}
|
||||
|
||||
void Assign(std::string_view s);
|
||||
void Free();
|
||||
|
||||
bool Equal(std::string_view o) const;
|
||||
bool Equal(const SmallString& mps) const;
|
||||
|
||||
uint16_t size() const {
|
||||
return size_;
|
||||
}
|
||||
|
||||
uint64_t HashCode() const;
|
||||
|
||||
// I am lying here. we should use mi_malloc_usable size really.
|
||||
uint16_t MallocUsed() const {
|
||||
return size_ >= kPrefLen + 8 ? size_ - kPrefLen : 8;
|
||||
}
|
||||
|
||||
void Get(std::string* dest) const;
|
||||
|
||||
// returns 1 or 2 slices representing this small string.
|
||||
// Guarantees zero copy, i.e. dest will not point to any of external buffers.
|
||||
// With current implementation, it will return 2 slices for a non-empty string.
|
||||
unsigned GetV(std::string_view dest[2]) const;
|
||||
|
||||
private:
|
||||
// prefix of the string that is broken down into 2 parts.
|
||||
char prefix_[kPrefLen];
|
||||
|
||||
uint32_t small_ptr_; // 32GB capacity because we ignore 3 lsb bits (i.e. x8).
|
||||
uint16_t size_; // uint16_t - total size (including prefix)
|
||||
|
||||
} __attribute__((packed));
|
||||
|
||||
} // namespace dfly
|
|
@ -25,6 +25,8 @@ using namespace util;
|
|||
#define ADD(x) (x) += o.x
|
||||
|
||||
DbStats& DbStats::operator+=(const DbStats& o) {
|
||||
static_assert(sizeof(DbStats) == 56);
|
||||
|
||||
ADD(key_count);
|
||||
ADD(expire_count);
|
||||
ADD(bucket_count);
|
||||
|
@ -32,6 +34,7 @@ DbStats& DbStats::operator+=(const DbStats& o) {
|
|||
|
||||
ADD(obj_memory_usage);
|
||||
ADD(table_mem_usage);
|
||||
ADD(small_string_bytes);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
@ -84,6 +87,7 @@ auto DbSlice::GetStats() const -> Stats {
|
|||
s.db.inline_keys += db->stats.inline_keys;
|
||||
s.db.table_mem_usage += (db->prime_table.mem_usage() + db->expire_table.mem_usage());
|
||||
}
|
||||
s.db.small_string_bytes = CompactObj::GetStats().small_string_bytes;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
@ -318,10 +322,10 @@ pair<MainIterator, bool> DbSlice::AddIfNotExist(DbIndex db_ind, string_view key,
|
|||
uint64_t expire_at_ms) {
|
||||
DCHECK(!obj.IsRef());
|
||||
|
||||
auto& db = db_arr_[db_ind];
|
||||
auto& db = *db_arr_[db_ind];
|
||||
CompactObj co_key{key};
|
||||
|
||||
auto [new_entry, inserted] = db->prime_table.Insert(std::move(co_key), std::move(obj));
|
||||
auto [new_entry, inserted] = db.prime_table.Insert(std::move(co_key), std::move(obj));
|
||||
|
||||
// in this case obj won't be moved and will be destroyed during unwinding.
|
||||
if (!inserted)
|
||||
|
@ -329,13 +333,13 @@ pair<MainIterator, bool> DbSlice::AddIfNotExist(DbIndex db_ind, string_view key,
|
|||
|
||||
new_entry.SetVersion(NextVersion());
|
||||
|
||||
db->stats.inline_keys += new_entry->first.IsInline();
|
||||
db->stats.obj_memory_usage += (new_entry->first.MallocUsed() + new_entry->second.MallocUsed());
|
||||
db.stats.inline_keys += new_entry->first.IsInline();
|
||||
db.stats.obj_memory_usage += (new_entry->first.MallocUsed() + new_entry->second.MallocUsed());
|
||||
|
||||
if (expire_at_ms) {
|
||||
new_entry->second.SetExpire(true);
|
||||
|
||||
CHECK(db->expire_table.Insert(new_entry->first.AsRef(), expire_at_ms).second);
|
||||
CHECK(db.expire_table.Insert(new_entry->first.AsRef(), expire_at_ms).second);
|
||||
}
|
||||
|
||||
return make_pair(new_entry, true);
|
||||
|
|
|
@ -38,6 +38,8 @@ struct DbStats {
|
|||
// Memory used by dictionaries.
|
||||
size_t table_mem_usage = 0;
|
||||
|
||||
size_t small_string_bytes = 0;
|
||||
|
||||
DbStats& operator+=(const DbStats& o);
|
||||
};
|
||||
|
||||
|
|
|
@ -395,6 +395,7 @@ tcp_port:)";
|
|||
absl::StrAppend(&info, "table_used_memory:", m.db.table_mem_usage, "\n");
|
||||
absl::StrAppend(&info, "num_entries:", m.db.key_count, "\n");
|
||||
absl::StrAppend(&info, "inline_keys:", m.db.inline_keys, "\n");
|
||||
absl::StrAppend(&info, "small_string_bytes:", m.db.small_string_bytes, "\n");
|
||||
}
|
||||
|
||||
if (should_enter("STATS")) {
|
||||
|
|
Loading…
Reference in a new issue