mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2024-12-14 11:58:02 +00:00
chore: add defrag for StringSet
This commit is contained in:
parent
f892d9b7fb
commit
1ade305731
4 changed files with 103 additions and 2 deletions
|
@ -278,6 +278,15 @@ pair<void*, bool> DefragSortedMap(detail::SortedMap* sm, float ratio) {
|
|||
return {sm, reallocated};
|
||||
}
|
||||
|
||||
pair<void*, bool> DefragStrSet(StringSet* ss, float ratio) {
|
||||
bool realloced = false;
|
||||
|
||||
for (auto it = ss->begin(); it != ss->end(); ++it)
|
||||
realloced |= it.ReallocIfNeeded(ratio);
|
||||
|
||||
return {ss, realloced};
|
||||
}
|
||||
|
||||
// Iterates over allocations of internal hash data structures and re-allocates
|
||||
// them if their pages are underutilized.
|
||||
// Returns pointer to new object ptr and whether any re-allocations happened.
|
||||
|
@ -306,8 +315,7 @@ pair<void*, bool> DefragSet(unsigned encoding, void* ptr, float ratio) {
|
|||
}
|
||||
|
||||
case kEncodingStrMap2: {
|
||||
// Still not implemented
|
||||
return {ptr, false};
|
||||
return DefragStrSet((StringSet*)ptr, ratio);
|
||||
}
|
||||
|
||||
default:
|
||||
|
|
|
@ -165,4 +165,44 @@ sds StringSet::MakeSetSds(string_view src, uint32_t ttl_sec) const {
|
|||
return sdsnewlen(src.data(), src.size());
|
||||
}
|
||||
|
||||
// Does not release obj. Callers must deallocate with sdsfree explicitly
|
||||
pair<sds, bool> StringSet::DuplicateEntryIfFragmented(void* obj, float ratio) {
|
||||
sds key = (sds)obj;
|
||||
|
||||
if (!zmalloc_page_is_underutilized(key, ratio))
|
||||
return {key, false};
|
||||
|
||||
size_t key_len = sdslen(key);
|
||||
bool has_ttl = MayHaveTtl(key);
|
||||
|
||||
if (has_ttl) {
|
||||
sds res = AllocSdsWithSpace(key_len, sizeof(uint32_t));
|
||||
std::memcpy(res, key, key_len + sizeof(uint32_t));
|
||||
return {res, true};
|
||||
}
|
||||
|
||||
return {sdsnewlen(key, key_len), true};
|
||||
}
|
||||
|
||||
bool StringSet::iterator::ReallocIfNeeded(float ratio) {
|
||||
auto* ptr = curr_entry_;
|
||||
if (ptr->IsLink()) {
|
||||
ptr = ptr->AsLink();
|
||||
}
|
||||
|
||||
DCHECK(!ptr->IsEmpty());
|
||||
DCHECK(ptr->IsObject());
|
||||
|
||||
auto* obj = ptr->GetObject();
|
||||
auto [new_obj, realloced] =
|
||||
static_cast<StringSet*>(owner_)->DuplicateEntryIfFragmented(obj, ratio);
|
||||
|
||||
if (realloced) {
|
||||
ptr->SetObject(new_obj);
|
||||
sdsfree((sds)obj);
|
||||
}
|
||||
|
||||
return realloced;
|
||||
}
|
||||
|
||||
} // namespace dfly
|
||||
|
|
|
@ -85,6 +85,10 @@ class StringSet : public DenseSet {
|
|||
using IteratorBase::ExpiryTime;
|
||||
using IteratorBase::HasExpiry;
|
||||
using IteratorBase::SetExpiryTime;
|
||||
|
||||
// Try reducing memory fragmentation of the value by re-allocating. Returns true if
|
||||
// re-allocation happened.
|
||||
bool ReallocIfNeeded(float ratio);
|
||||
};
|
||||
|
||||
iterator begin() {
|
||||
|
@ -114,6 +118,9 @@ class StringSet : public DenseSet {
|
|||
void ObjDelete(void* obj, bool has_ttl) const override;
|
||||
void* ObjectClone(const void* obj, bool has_ttl, bool add_ttl) const override;
|
||||
sds MakeSetSds(std::string_view src, uint32_t ttl_sec) const;
|
||||
|
||||
private:
|
||||
std::pair<sds, bool> DuplicateEntryIfFragmented(void* obj, float ratio);
|
||||
};
|
||||
|
||||
template <typename T> unsigned StringSet::AddMany(absl::Span<T> span, uint32_t ttl_sec) {
|
||||
|
|
|
@ -657,4 +657,50 @@ void BM_Grow(benchmark::State& state) {
|
|||
}
|
||||
BENCHMARK(BM_Grow);
|
||||
|
||||
unsigned total_wasted_memory = 0;
|
||||
|
||||
TEST_F(StringSetTest, ReallocIfNeeded) {
|
||||
auto build_str = [](size_t i) { return to_string(i) + string(131, 'a'); };
|
||||
|
||||
auto count_waste = [](const mi_heap_t* heap, const mi_heap_area_t* area, void* block,
|
||||
size_t block_size, void* arg) {
|
||||
size_t used = block_size * area->used;
|
||||
total_wasted_memory += area->committed - used;
|
||||
return true;
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < 10'000; i++)
|
||||
ss_->Add(build_str(i));
|
||||
|
||||
for (size_t i = 0; i < 10'000; i++) {
|
||||
if (i % 10 == 0)
|
||||
continue;
|
||||
ss_->Erase(build_str(i));
|
||||
}
|
||||
|
||||
mi_heap_collect(mi_heap_get_backing(), true);
|
||||
mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr);
|
||||
size_t wasted_before = total_wasted_memory;
|
||||
|
||||
size_t underutilized = 0;
|
||||
for (auto it = ss_->begin(); it != ss_->end(); ++it) {
|
||||
underutilized += zmalloc_page_is_underutilized(*it, 0.9);
|
||||
it.ReallocIfNeeded(0.9);
|
||||
}
|
||||
// Check there are underutilized pages
|
||||
CHECK_GT(underutilized, 0u);
|
||||
|
||||
total_wasted_memory = 0;
|
||||
mi_heap_collect(mi_heap_get_backing(), true);
|
||||
mi_heap_visit_blocks(mi_heap_get_backing(), false, count_waste, nullptr);
|
||||
size_t wasted_after = total_wasted_memory;
|
||||
|
||||
// Check we waste significanlty less now
|
||||
EXPECT_GT(wasted_before, wasted_after * 2);
|
||||
|
||||
EXPECT_EQ(ss_->UpperBoundSize(), 1000);
|
||||
for (size_t i = 0; i < 1000; i++)
|
||||
EXPECT_EQ(*ss_->Find(build_str(i * 10)), build_str(i * 10));
|
||||
}
|
||||
|
||||
} // namespace dfly
|
||||
|
|
Loading…
Reference in a new issue