chore(search): Block list (#2307)

chore(search): Block list --------- Signed-off-by: Vladislav Oleshko <vlad@dragonflydb.io>
2024-12-14 11:58:02 +00:00 · 2023-12-24 17:42:03 +03:00 · 2023-12-24 17:42:03 +03:00 · d129674e17
commit d129674e17
parent 8bd43497f2
10 changed files with 480 additions and 38 deletions
--- a/src/core/search/CMakeLists.txt
+++ b/src/core/search/CMakeLists.txt
@ -3,11 +3,13 @@ gen_bison(parser)

 cur_gen_dir(gen_dir)

-add_library(query_parser base.cc ast_expr.cc query_driver.cc search.cc indices.cc sort_indices.cc vector_utils.cc
-            compressed_sorted_set.cc ${gen_dir}/parser.cc ${gen_dir}/lexer.cc)
+add_library(query_parser base.cc ast_expr.cc query_driver.cc search.cc indices.cc
+            sort_indices.cc vector_utils.cc compressed_sorted_set.cc block_list.cc
+            ${gen_dir}/parser.cc ${gen_dir}/lexer.cc)

 target_link_libraries(query_parser base absl::strings TRDP::reflex TRDP::uni-algo TRDP::hnswlib)

 cxx_test(compressed_sorted_set_test query_parser LABELS DFLY)
+cxx_test(block_list_test query_parser LABELS DFLY)
 cxx_test(search_parser_test query_parser LABELS DFLY)
 cxx_test(search_test query_parser LABELS DFLY)
--- a/src/core/search/block_list.cc
+++ b/src/core/search/block_list.cc
@ -0,0 +1,134 @@
+#include "core/search/block_list.h"
+
+namespace dfly::search {
+
+using namespace std;
+
+template <typename C> bool BlockList<C>::Insert(DocId t) {
+  auto block = FindBlock(t);
+  if (block == blocks_.end())
+    block = blocks_.insert(blocks_.end(), C{blocks_.get_allocator().resource()});
+
+  if (!block->Insert(t))
+    return false;
+
+  size_++;
+  TrySplit(block);
+  return true;
+}
+
+template <typename C> bool BlockList<C>::Remove(DocId t) {
+  if (auto block = FindBlock(t); block != blocks_.end() && block->Remove(t)) {
+    size_--;
+    TryMerge(block);
+    return true;
+  }
+
+  return false;
+}
+
+template <typename C> typename BlockList<C>::BlockIt BlockList<C>::FindBlock(DocId t) {
+  DCHECK(blocks_.empty() || blocks_.back().Size() > 0u);
+
+  if (!blocks_.empty() && t >= *blocks_.back().begin())
+    return --blocks_.end();
+
+  // Find first block that can't contain t
+  auto it = std::upper_bound(blocks_.begin(), blocks_.end(), t,
+                             [](DocId t, const C& l) { return *l.begin() > t; });
+
+  // Move to previous if possible
+  if (it != blocks_.begin())
+    --it;
+
+  DCHECK(it == blocks_.begin() || it->Size() > 0);
+  DCHECK(it == blocks_.begin() || it == blocks_.begin() || it->Size() * 2 >= block_size_);
+  DCHECK(it == blocks_.end() || it->Size() <= 2 * block_size_);
+  return it;
+}
+
+template <typename C> void BlockList<C>::TryMerge(BlockIt block) {
+  if (block->Size() == 0) {
+    blocks_.erase(block);
+    return;
+  }
+
+  if (block->Size() >= block_size_ / 2 || block == blocks_.begin())
+    return;
+
+  // Merge strictly right with left to benefit from tail insert optimizations
+  size_t idx = std::distance(blocks_.begin(), block);
+  blocks_[idx - 1].Merge(std::move(*block));
+  blocks_.erase(block);
+
+  TrySplit(blocks_.begin() + (idx - 1));  // to not overgrow it
+}
+
+template <typename C> void BlockList<C>::TrySplit(BlockIt block) {
+  if (block->Size() < block_size_ * 2)
+    return;
+
+  auto [left, right] = std::move(*block).Split();
+
+  *block = std::move(right);
+  blocks_.insert(block, std::move(left));
+}
+
+template <typename C>
+typename BlockList<C>::BlockListIterator& BlockList<C>::BlockListIterator::operator++() {
+  ++*block_it;
+  if (block_it == block_end) {
+    ++it;
+    if (it != it_end) {
+      block_it = it->begin();
+      block_end = it->end();
+    } else {
+      block_it = std::nullopt;
+      block_end = std::nullopt;
+    }
+  }
+  return *this;
+}
+
+template class BlockList<CompressedSortedSet>;
+template class BlockList<SortedVector>;
+
+bool SortedVector::Insert(DocId t) {
+  if (entries_.size() > 0 && t > entries_.back()) {
+    entries_.push_back(t);
+    return true;
+  }
+
+  auto it = std::lower_bound(entries_.begin(), entries_.end(), t);
+  if (it != entries_.end() && *it == t)
+    return false;
+
+  entries_.insert(it, t);
+  return true;
+}
+
+bool SortedVector::Remove(DocId t) {
+  auto it = std::lower_bound(entries_.begin(), entries_.end(), t);
+  if (it != entries_.end() && *it == t) {
+    entries_.erase(it);
+    return true;
+  }
+  return false;
+}
+
+void SortedVector::Merge(SortedVector&& other) {
+  // NLog compexity in theory, but in practice used only to merge with larger values.
+  // Tail insert optimization makes it linear
+  entries_.reserve(entries_.size() + other.entries_.size());
+  for (int t : other.entries_)
+    Insert(t);
+}
+
+std::pair<SortedVector, SortedVector> SortedVector::Split() && {
+  PMR_NS::vector<DocId> tail(entries_.begin() + entries_.size() / 2, entries_.end());
+  entries_.resize(entries_.size() / 2);
+
+  return std::make_pair(std::move(*this), SortedVector{std::move(tail)});
+}
+
+}  // namespace dfly::search
--- a/src/core/search/block_list.h
+++ b/src/core/search/block_list.h
@ -0,0 +1,133 @@
+#pragma once
+
+#include <absl/types/span.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <iterator>
+#include <optional>
+#include <vector>
+
+#include "core/search/base.h"
+#include "core/search/compressed_sorted_set.h"
+
+namespace dfly::search {
+// BlockList is a container wrapper for CompressedSortedSet / vector<DocId>
+// to divide the full sorted id range into separate blocks. This reduces modification
+// complexity from O(N) to O(logN + K), where K is the max block size.
+//
+// It tries to balance block sizes in the range [block_size / 2, block_size * 2]
+// by splitting or merging nodes when needed.
+template <typename Container /* underlying container */> class BlockList {
+  using BlockIt = typename PMR_NS::vector<Container>::iterator;
+  using ConstBlockIt = typename PMR_NS::vector<Container>::const_iterator;
+
+ public:
+  BlockList(PMR_NS::memory_resource* mr, size_t block_size = 1000)
+      : block_size_{block_size}, blocks_(mr) {
+  }
+
+  // Insert element, returns true if inserted, false if already present.
+  bool Insert(DocId t);
+
+  // Remove element, returns true if removed, false if not found.
+  bool Remove(DocId t);
+
+  size_t Size() const {
+    return size_;
+  }
+
+  size_t size() const {
+    return size_;
+  }
+
+  struct BlockListIterator {
+    // To make it work with std container contructors
+    using iterator_category = std::forward_iterator_tag;
+    using difference_type = std::ptrdiff_t;
+    using value_type = DocId;
+    using pointer = DocId*;
+    using reference = DocId&;
+
+    DocId operator*() const {
+      return **block_it;
+    }
+
+    BlockListIterator& operator++();
+
+    friend class BlockList;
+
+    bool operator==(const BlockListIterator& other) const {
+      return it == other.it && block_it == other.block_it;
+    }
+
+    bool operator!=(const BlockListIterator& other) const {
+      return !operator==(other);
+    }
+
+   private:
+    BlockListIterator(ConstBlockIt begin, ConstBlockIt end) : it(begin), it_end(end) {
+      if (it != it_end) {
+        block_it = it->begin();
+        block_end = it->end();
+      }
+    }
+
+    ConstBlockIt it, it_end;
+    std::optional<typename Container::iterator> block_it, block_end;
+  };
+
+  BlockListIterator begin() const {
+    return BlockListIterator{blocks_.begin(), blocks_.end()};
+  }
+
+  BlockListIterator end() const {
+    return BlockListIterator{blocks_.end(), blocks_.end()};
+  }
+
+ private:
+  // Find block that should contain t. Returns end() only if empty
+  BlockIt FindBlock(DocId t);
+
+  void TryMerge(BlockIt block);  // If needed, merge with previous block
+  void TrySplit(BlockIt block);  // If needed, split into two blocks
+
+ private:
+  const size_t block_size_ = 1000;
+  size_t size_ = 0;
+  PMR_NS::vector<Container> blocks_;
+};
+
+// Supports Insert and Remove operations for keeping a sorted vector internally.
+// Wrapper to use vectors with BlockList
+struct SortedVector {
+  explicit SortedVector(PMR_NS::memory_resource* mr) : entries_(mr) {
+  }
+
+  bool Insert(DocId t);
+  bool Remove(DocId t);
+  void Merge(SortedVector&& other);
+  std::pair<SortedVector, SortedVector> Split() &&;
+
+  size_t Size() {
+    return entries_.size();
+  }
+
+  using iterator = typename PMR_NS::vector<DocId>::const_iterator;
+
+  iterator begin() const {
+    return entries_.cbegin();
+  }
+
+  iterator end() const {
+    return entries_.cend();
+  }
+
+ private:
+  SortedVector(PMR_NS::vector<DocId>&& v) : entries_{std::move(v)} {
+  }
+
+  PMR_NS::vector<DocId> entries_;
+};
+
+}  // namespace dfly::search
--- a/src/core/search/block_list_test.cc
+++ b/src/core/search/block_list_test.cc
@ -0,0 +1,123 @@
+// Copyright 2023, DragonflyDB authors.  All rights reserved.
+// See LICENSE for licensing terms.
+//
+
+#include "core/search/block_list.h"
+
+#include <absl/container/btree_set.h>
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <set>
+
+#include "base/gtest.h"
+#include "base/logging.h"
+
+namespace dfly::search {
+
+using namespace std;
+
+template <typename C> class BlockListTest : public testing::Test {
+ public:
+  auto Make() {
+    // Create list with small block size to test blocking mechanism more extensively
+    return BlockList<C>{PMR_NS::get_default_resource(), 10};
+  }
+};
+
+using ContainerTypes = ::testing::Types<CompressedSortedSet, SortedVector>;
+TYPED_TEST_SUITE(BlockListTest, ContainerTypes);
+
+TYPED_TEST(BlockListTest, LoopMidInsertErase) {
+  const size_t kNumElements = 50;
+  auto list = this->Make();
+
+  for (size_t i = 0; i < kNumElements / 2; i++) {
+    list.Insert(i);
+    list.Insert(i + kNumElements / 2);
+  }
+
+  vector<int> out(list.begin(), list.end());
+  ASSERT_EQ(list.size(), kNumElements);
+  ASSERT_EQ(out.size(), kNumElements);
+  for (size_t i = 0; i < kNumElements; i++)
+    ASSERT_EQ(out[i], i);
+
+  for (size_t i = 0; i < kNumElements / 2; i++) {
+    list.Remove(i);
+    list.Remove(i + kNumElements / 2);
+  }
+
+  out = {list.begin(), list.end()};
+  EXPECT_EQ(out.size(), 0u);
+}
+
+TYPED_TEST(BlockListTest, InsertReverseRemoveSteps) {
+  const size_t kNumElements = 1000;
+  auto list = this->Make();
+
+  for (size_t i = 0; i < kNumElements; i++) {
+    list.Insert(kNumElements - i - 1);
+  }
+
+  for (size_t deleted_pref = 0; deleted_pref < 10; deleted_pref++) {
+    vector<DocId> out{list.begin(), list.end()};
+    reverse(out.begin(), out.end());
+
+    EXPECT_EQ(out.size(), kNumElements / 10 * (10 - deleted_pref));
+    for (size_t i = 0; i < kNumElements; i++) {
+      if (i % 10 >= deleted_pref) {
+        EXPECT_EQ(out.back(), DocId(i));
+        out.pop_back();
+      }
+    }
+
+    for (size_t i = 0; i < kNumElements; i++) {
+      if (i % 10 == deleted_pref)
+        list.Remove(i);
+    }
+  }
+
+  EXPECT_EQ(list.size(), 0u);
+}
+
+TYPED_TEST(BlockListTest, RandomNumbers) {
+  const size_t kNumIterations = 1'000;
+  auto list = this->Make();
+  std::set<DocId> list_copy;
+
+  for (size_t i = 0; i < kNumIterations; i++) {
+    if (list_copy.size() > 100 && rand() % 5 == 0) {
+      auto it = list_copy.begin();
+      std::advance(it, rand() % list_copy.size());
+      list.Remove(*it);
+      list_copy.erase(it);
+    } else {
+      DocId t = rand() % 1'000'000;
+      list.Insert(t);
+      list_copy.insert(t);
+    }
+
+    ASSERT_TRUE(std::equal(list.begin(), list.end(), list_copy.begin(), list_copy.end()));
+  }
+}
+
+static void BM_Erase90PctTail(benchmark::State& state) {
+  BlockList<CompressedSortedSet> bl{PMR_NS::get_default_resource()};
+
+  unsigned size = state.range(0);
+  for (size_t i = 0; i < size; i++)
+    bl.Insert(i);
+
+  size_t base = size / 10;
+  size_t i = 0;
+  while (state.KeepRunning()) {
+    benchmark::DoNotOptimize(bl.Remove(base + i));
+    i = (i + 1) % (size * 9 / 10);
+  }
+}
+
+BENCHMARK(BM_Erase90PctTail)->Args({100'000});
+
+}  // namespace dfly::search
--- a/src/core/search/compressed_sorted_set.cc
+++ b/src/core/search/compressed_sorted_set.cc
@ -97,27 +97,27 @@ CompressedSortedSet::EntryLocation CompressedSortedSet::LowerBound(IntType value
 // needs to be inserted. Then it computes the differences dif1 = V - A and diff2 = B - V that need
 // to be stored to encode the triple A V B. Those are stored where diff0 = B - A was previously
 // stored, possibly extending the vector
-void CompressedSortedSet::Insert(IntType value) {
+bool CompressedSortedSet::Insert(IntType value) {
  if (tail_value_ && *tail_value_ == value)
-    return;
+    return false;

  if (tail_value_ && value > *tail_value_) {
    PushBackDiff(value - *tail_value_);
    tail_value_ = value;
-    return;
+    return true;
  }

  auto bound = LowerBound(value);

  // At least one element was read and it's equal to value: return to avoid duplicate
  if (bound.value == value && !bound.diff_span.empty())
-    return;
+    return false;

  // Value is bigger than any other (or list is empty): append required diff at the end
  if (value > bound.value || bound.diff_span.empty()) {
    PushBackDiff(value - bound.value);
    tail_value_ = value;
-    return;
+    return true;
  }

  size_++;
@ -141,17 +141,19 @@ void CompressedSortedSet::Insert(IntType value) {
  // Now overwrite diff0 and 0s with the two new differences
  copy(diff1_span.begin(), diff1_span.end(), diffs_.begin() + diff_offset);
  copy(diff2_span.begin(), diff2_span.end(), diffs_.begin() + diff_offset + diff1_span.size());
+
+  return true;
 }

 // Remove has linear complexity. It tries to find the element V and its neighbors A and B,
 // which are encoded as diff1 = V - A and diff2 = B - V. Adjacently stored diff1 and diff2
 // need to be replaced with diff3 = diff1 + diff2s
-void CompressedSortedSet::Remove(IntType value) {
+bool CompressedSortedSet::Remove(IntType value) {
  auto bound = LowerBound(value);

  // Nothing was read or the element was not found
  if (bound.diff_span.empty() || bound.value != value)
-    return;
+    return false;

  // We're removing below unconditionally
  size_--;
@ -166,7 +168,7 @@ void CompressedSortedSet::Remove(IntType value) {
    tail_value_ = bound.prev_value;
    if (diffs_.empty())
      tail_value_ = nullopt;
-    return;
+    return true;
  }

  // Now the list certainly contains a succeeding element B > V and possibly A < V (or 0)
@ -185,6 +187,8 @@ void CompressedSortedSet::Remove(IntType value) {

  // Overwrite diff1/diff2 with new diff3
  copy(diff3_buf.begin(), diff3_buf.end(), diffs_.begin() + diff_offset);
+
+  return true;
 }

 size_t CompressedSortedSet::Size() const {
@ -195,6 +199,35 @@ size_t CompressedSortedSet::ByteSize() const {
  return diffs_.size();
 }

+void CompressedSortedSet::Merge(CompressedSortedSet&& other) {
+  // Quadratic compexity in theory, but in practice used only to merge with larger values.
+  // Tail insert optimization makes it linear
+  for (int v : other)
+    Insert(v);
+}
+
+std::pair<CompressedSortedSet, CompressedSortedSet> CompressedSortedSet::Split() && {
+  DCHECK_GT(Size(), 5u);
+
+  CompressedSortedSet second(diffs_.get_allocator().resource());
+
+  // Move iterator to middle position and save size of diffs tail
+  auto it = begin();
+  std::advance(it, size_ / 2);
+  size_t keep_bytes = it.last_read_.data() - diffs_.data();
+
+  // Copy second half into second set
+  for (; it != end(); ++it)
+    second.Insert(*it);
+
+  // Erase diffs tail
+  diffs_.resize(keep_bytes);
+  tail_value_ = std::nullopt;
+  size_ -= second.Size();
+
+  return std::make_pair(std::move(*this), std::move(second));
+}
+
 // The leftmost three bits of the first byte store the number of additional bytes. All following
 // bits store the number itself.
 absl::Span<uint8_t> CompressedSortedSet::WriteVarLen(IntType value, absl::Span<uint8_t> buf) {
--- a/src/core/search/compressed_sorted_set.h
+++ b/src/core/search/compressed_sorted_set.h
@ -7,6 +7,7 @@
 #include <optional>
 #include <vector>

+#include "base/logging.h"
 #include "base/pmr/memory_resource.h"
 #include "core/search/base.h"

@ -48,7 +49,7 @@ class CompressedSortedSet {
    absl::Span<const uint8_t> diffs_{};
  };

-  friend struct Iterator;
+  using iterator = ConstIterator;

 public:
  explicit CompressedSortedSet(PMR_NS::memory_resource* mr);
@ -56,16 +57,17 @@ class CompressedSortedSet {
  ConstIterator begin() const;
  ConstIterator end() const;

-  void Insert(IntType value);  // Insert arbitrary element, needs to scan whole list
-  void Remove(IntType value);  // Remove arbitrary element, needs to scan whole list
+  bool Insert(IntType value);  // Insert arbitrary element, needs to scan whole list
+  bool Remove(IntType value);  // Remove arbitrary element, needs to scan whole list

  size_t Size() const;
  size_t ByteSize() const;

-  // To use transparently in templates together with stl containers
-  size_t size() const {
-    return Size();
-  }
+  // Add all values from other
+  void Merge(CompressedSortedSet&& other);
+
+  // Split into two equally sized halves
+  std::pair<CompressedSortedSet, CompressedSortedSet> Split() &&;

 private:
  struct EntryLocation {
@ -90,6 +92,7 @@ class CompressedSortedSet {

 private:
  uint32_t size_{0};
+  IntType head_value_{0};
  std::optional<IntType> tail_value_{};
  std::vector<uint8_t, PMR_NS::polymorphic_allocator<uint8_t>> diffs_;
 };
--- a/src/core/search/compressed_sorted_set_test.cc
+++ b/src/core/search/compressed_sorted_set_test.cc
@ -4,10 +4,13 @@

 #include "core/search/compressed_sorted_set.h"

+#include <absl/container/btree_set.h>
+
 #include <algorithm>

 #include "base/gtest.h"
 #include "base/logging.h"
+#include "core/bptree_set.h"

 namespace dfly::search {

--- a/src/core/search/indices.cc
+++ b/src/core/search/indices.cc
@ -88,10 +88,12 @@ vector<DocId> NumericIndex::Range(double l, double r) const {
  return out;
 }

-BaseStringIndex::BaseStringIndex(PMR_NS::memory_resource* mr) : entries_{mr} {
+template <typename C>
+BaseStringIndex<C>::BaseStringIndex(PMR_NS::memory_resource* mr) : entries_{mr} {
 }

-const CompressedSortedSet* BaseStringIndex::Matching(string_view str) const {
+template <typename C>
+const typename BaseStringIndex<C>::Container* BaseStringIndex<C>::Matching(string_view str) const {
  str = absl::StripAsciiWhitespace(str);

  string word;
@ -104,12 +106,14 @@ const CompressedSortedSet* BaseStringIndex::Matching(string_view str) const {
  return (it != entries_.end()) ? &it->second : nullptr;
 }

-CompressedSortedSet* BaseStringIndex::GetOrCreate(string_view word) {
+template <typename C>
+typename BaseStringIndex<C>::Container* BaseStringIndex<C>::GetOrCreate(string_view word) {
  auto* mr = entries_.get_allocator().resource();
-  return &entries_.try_emplace(PMR_NS::string{word, mr}, mr).first->second;
+  return &entries_.try_emplace(PMR_NS::string{word, mr}, mr, 1000 /* block size */).first->second;
 }

-void BaseStringIndex::Add(DocId id, DocumentAccessor* doc, string_view field) {
+template <typename C>
+void BaseStringIndex<C>::Add(DocId id, DocumentAccessor* doc, string_view field) {
  absl::flat_hash_set<std::string> tokens;
  for (string_view str : doc->GetStrings(field))
    tokens.merge(Tokenize(str));
@ -118,7 +122,8 @@ void BaseStringIndex::Add(DocId id, DocumentAccessor* doc, string_view field) {
    GetOrCreate(token)->Insert(id);
 }

-void BaseStringIndex::Remove(DocId id, DocumentAccessor* doc, string_view field) {
+template <typename C>
+void BaseStringIndex<C>::Remove(DocId id, DocumentAccessor* doc, string_view field) {
  absl::flat_hash_set<std::string> tokens;
  for (string_view str : doc->GetStrings(field))
    tokens.merge(Tokenize(str));
@ -134,6 +139,9 @@ void BaseStringIndex::Remove(DocId id, DocumentAccessor* doc, string_view field)
  }
 }

+template struct BaseStringIndex<CompressedSortedSet>;
+template struct BaseStringIndex<SortedVector>;
+
 absl::flat_hash_set<std::string> TextIndex::Tokenize(std::string_view value) const {
  return TokenizeWords(value);
 }
--- a/src/core/search/indices.h
+++ b/src/core/search/indices.h
@ -13,6 +13,7 @@

 #include "base/pmr/memory_resource.h"
 #include "core/search/base.h"
+#include "core/search/block_list.h"
 #include "core/search/compressed_sorted_set.h"

 // TODO: move core field definitions out of big header
@ -36,7 +37,9 @@ struct NumericIndex : public BaseIndex {
 };

 // Base index for string based indices.
-struct BaseStringIndex : public BaseIndex {
+template <typename C> struct BaseStringIndex : public BaseIndex {
+  using Container = BlockList<C>;
+
  BaseStringIndex(PMR_NS::memory_resource* mr);

  void Add(DocId id, DocumentAccessor* doc, std::string_view field) override;
@ -46,10 +49,10 @@ struct BaseStringIndex : public BaseIndex {
  virtual absl::flat_hash_set<std::string> Tokenize(std::string_view value) const = 0;

  // Pointer is valid as long as index is not mutated. Nullptr if not found
-  const CompressedSortedSet* Matching(std::string_view str) const;
+  const Container* Matching(std::string_view str) const;

 protected:
-  CompressedSortedSet* GetOrCreate(std::string_view word);
+  Container* GetOrCreate(std::string_view word);

  struct PmrEqual {
    using is_transparent = void;
@ -71,14 +74,14 @@ struct BaseStringIndex : public BaseIndex {
    }
  };

-  absl::flat_hash_map<PMR_NS::string, CompressedSortedSet, PmrHash, PmrEqual,
-                      PMR_NS::polymorphic_allocator<std::pair<PMR_NS::string, CompressedSortedSet>>>
+  absl::flat_hash_map<PMR_NS::string, Container, PmrHash, PmrEqual,
+                      PMR_NS::polymorphic_allocator<std::pair<PMR_NS::string, Container>>>
      entries_;
 };

 // Index for text fields.
 // Hashmap based lookup per word.
-struct TextIndex : public BaseStringIndex {
+struct TextIndex : public BaseStringIndex<CompressedSortedSet> {
  TextIndex(PMR_NS::memory_resource* mr) : BaseStringIndex(mr) {
  }

@ -87,7 +90,7 @@ struct TextIndex : public BaseStringIndex {

 // Index for text fields.
 // Hashmap based lookup per word.
-struct TagIndex : public BaseStringIndex {
+struct TagIndex : public BaseStringIndex<SortedVector> {
  TagIndex(PMR_NS::memory_resource* mr) : BaseStringIndex(mr) {
  }

--- a/src/core/search/search.cc
+++ b/src/core/search/search.cc
@ -45,20 +45,18 @@ AstExpr ParseQuery(std::string_view query, const QueryParams* params) {
 // Represents an either owned or non-owned result set that can be accessed transparently.
 struct IndexResult {
  using DocVec = vector<DocId>;
-  using BorrowedView = variant<const DocVec*, const CompressedSortedSet*>;
+  using BorrowedView =
+      variant<const DocVec*, const BlockList<CompressedSortedSet>*, const BlockList<SortedVector>*>;

  IndexResult() : value_{DocVec{}} {
  }

-  IndexResult(const CompressedSortedSet* css) : value_{css} {
-    if (css == nullptr)
-      value_ = DocVec{};
-  }
-
  IndexResult(DocVec&& dv) : value_{std::move(dv)} {
  }

-  IndexResult(const DocVec* dv) : value_{dv} {
+  template <typename C> IndexResult(const C* container = nullptr) : value_{container} {
+    if (container == nullptr)
+      value_ = DocVec{};
  }

  size_t Size() const {
@ -108,7 +106,9 @@ struct IndexResult {
  }

 private:
-  variant<DocVec /*owned*/, const CompressedSortedSet*, const DocVec*> value_;
+  variant<DocVec /*owned*/, const DocVec*, const BlockList<CompressedSortedSet>*,
+          const BlockList<SortedVector>*>
+      value_;
 };

 struct ProfileBuilder {