1
0
Fork 0
mirror of https://github.com/dragonflydb/dragonfly.git synced 2024-12-14 11:58:02 +00:00

chore: add defrag for StringSet

This commit is contained in:
kostas 2024-12-13 12:13:30 +02:00
parent f892d9b7fb
commit 1ade305731
No known key found for this signature in database
GPG key ID: 1860AC7B1177CACB
4 changed files with 103 additions and 2 deletions

View file

@ -278,6 +278,15 @@ pair<void*, bool> DefragSortedMap(detail::SortedMap* sm, float ratio) {
return {sm, reallocated};
}
pair<void*, bool> DefragStrSet(StringSet* ss, float ratio) {
bool realloced = false;
for (auto it = ss->begin(); it != ss->end(); ++it)
realloced |= it.ReallocIfNeeded(ratio);
return {ss, realloced};
}
// Iterates over allocations of internal hash data structures and re-allocates
// them if their pages are underutilized.
// Returns pointer to new object ptr and whether any re-allocations happened.
@ -306,8 +315,7 @@ pair<void*, bool> DefragSet(unsigned encoding, void* ptr, float ratio) {
}
case kEncodingStrMap2: {
// Still not implemented
return {ptr, false};
return DefragStrSet((StringSet*)ptr, ratio);
}
default:

View file

@ -165,4 +165,44 @@ sds StringSet::MakeSetSds(string_view src, uint32_t ttl_sec) const {
return sdsnewlen(src.data(), src.size());
}
// Does not release obj. Callers must deallocate with sdsfree explicitly
pair<sds, bool> StringSet::DuplicateEntryIfFragmented(void* obj, float ratio) {
sds key = (sds)obj;
if (!zmalloc_page_is_underutilized(key, ratio))
return {key, false};
size_t key_len = sdslen(key);
bool has_ttl = MayHaveTtl(key);
if (has_ttl) {
sds res = AllocSdsWithSpace(key_len, sizeof(uint32_t));
std::memcpy(res, key, key_len + sizeof(uint32_t));
return {res, true};
}
return {sdsnewlen(key, key_len), true};
}
bool StringSet::iterator::ReallocIfNeeded(float ratio) {
auto* ptr = curr_entry_;
if (ptr->IsLink()) {
ptr = ptr->AsLink();
}
DCHECK(!ptr->IsEmpty());
DCHECK(ptr->IsObject());
auto* obj = ptr->GetObject();
auto [new_obj, realloced] =
static_cast<StringSet*>(owner_)->DuplicateEntryIfFragmented(obj, ratio);
if (realloced) {
ptr->SetObject(new_obj);
sdsfree((sds)obj);
}
return realloced;
}
} // namespace dfly

View file

@ -85,6 +85,10 @@ class StringSet : public DenseSet {
using IteratorBase::ExpiryTime;
using IteratorBase::HasExpiry;
using IteratorBase::SetExpiryTime;
// Try reducing memory fragmentation of the value by re-allocating. Returns true if
// re-allocation happened.
bool ReallocIfNeeded(float ratio);
};
iterator begin() {
@ -114,6 +118,9 @@ class StringSet : public DenseSet {
void ObjDelete(void* obj, bool has_ttl) const override;
void* ObjectClone(const void* obj, bool has_ttl, bool add_ttl) const override;
sds MakeSetSds(std::string_view src, uint32_t ttl_sec) const;
private:
std::pair<sds, bool> DuplicateEntryIfFragmented(void* obj, float ratio);
};
template <typename T> unsigned StringSet::AddMany(absl::Span<T> span, uint32_t ttl_sec) {

View file

@ -657,4 +657,50 @@ void BM_Grow(benchmark::State& state) {
}
BENCHMARK(BM_Grow);
unsigned total_wasted_memory = 0;
TEST_F(StringSetTest, ReallocIfNeeded) {
auto build_str = [](size_t i) { return to_string(i) + string(131, 'a'); };
auto count_waste = [](const mi_heap_t* heap, const mi_heap_area_t* area, void* block,
size_t block_size, void* arg) {
size_t used = block_size * area->used;
total_wasted_memory += area->committed - used;
return true;
};
for (size_t i = 0; i < 10'000; i++)
ss_->Add(build_str(i));
for (size_t i = 0; i < 10'000; i++) {
if (i % 10 == 0)
continue;
ss_->Erase(build_str(i));
}
mi_heap_collect(mi_heap_get_backing(), true);
mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr);
size_t wasted_before = total_wasted_memory;
size_t underutilized = 0;
for (auto it = ss_->begin(); it != ss_->end(); ++it) {
underutilized += zmalloc_page_is_underutilized(*it, 0.9);
it.ReallocIfNeeded(0.9);
}
// Check there are underutilized pages
CHECK_GT(underutilized, 0u);
total_wasted_memory = 0;
mi_heap_collect(mi_heap_get_backing(), true);
mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr);
size_t wasted_after = total_wasted_memory;
// Check we waste significanlty less now
EXPECT_GT(wasted_before, wasted_after * 2);
EXPECT_EQ(ss_->UpperBoundSize(), 1000);
for (size_t i = 0; i < 1000; i++)
EXPECT_EQ(*ss_->Find(build_str(i * 10)), build_str(i * 10));
}
} // namespace dfly