1
0
Fork 0
mirror of https://github.com/dragonflydb/dragonfly.git synced 2024-12-15 17:51:06 +00:00

fix(search): Small improvements (#2248)

This commit is contained in:
Vladislav 2023-12-03 20:04:47 +03:00 committed by GitHub
parent 26512fdba4
commit f17bfaf0ff
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 61 additions and 26 deletions

View file

@ -149,8 +149,11 @@ std::pair<size_t /*dim*/, VectorSimilarity> BaseVectorIndex::Info() const {
return {dim_, sim_};
}
FlatVectorIndex::FlatVectorIndex(size_t dim, VectorSimilarity sim, PMR_NS::memory_resource* mr)
: BaseVectorIndex{dim, sim}, entries_{mr} {
FlatVectorIndex::FlatVectorIndex(const SchemaField::VectorParams& params,
PMR_NS::memory_resource* mr)
: BaseVectorIndex{params.dim, params.sim}, entries_{mr} {
DCHECK(!params.use_hnsw);
entries_.reserve(params.capacity * params.dim);
}
void FlatVectorIndex::Add(DocId id, DocumentAccessor* doc, string_view field) {
@ -174,8 +177,10 @@ const float* FlatVectorIndex::Get(DocId doc) const {
}
struct HnswlibAdapter {
HnswlibAdapter(size_t dim, VectorSimilarity sim, size_t cap)
: space_{MakeSpace(dim, sim)}, world_{GetSpacePtr(), cap} {
HnswlibAdapter(const SchemaField::VectorParams& params)
: space_{MakeSpace(params.dim, params.sim)}, world_{GetSpacePtr(), params.capacity,
params.hnsw_m, 200,
100, true} {
}
void Add(float* data, DocId id) {
@ -235,9 +240,9 @@ struct HnswlibAdapter {
hnswlib::HierarchicalNSW<float> world_;
};
HnswVectorIndex::HnswVectorIndex(size_t dim, VectorSimilarity sim, size_t capacity,
PMR_NS::memory_resource*)
: BaseVectorIndex{dim, sim}, adapter_{make_unique<HnswlibAdapter>(dim, sim, capacity)} {
HnswVectorIndex::HnswVectorIndex(const SchemaField::VectorParams& params, PMR_NS::memory_resource*)
: BaseVectorIndex{params.dim, params.sim}, adapter_{make_unique<HnswlibAdapter>(params)} {
DCHECK(params.use_hnsw);
// TODO: Patch hnsw to use MR
}

View file

@ -15,6 +15,9 @@
#include "core/search/base.h"
#include "core/search/compressed_sorted_set.h"
// TODO: move core field definitions out of big header
#include "core/search/search.h"
namespace dfly::search {
// Index for integer fields.
@ -104,7 +107,7 @@ struct BaseVectorIndex : public BaseIndex {
// Index for vector fields.
// Only supports lookup by id.
struct FlatVectorIndex : public BaseVectorIndex {
FlatVectorIndex(size_t dim, VectorSimilarity sim, PMR_NS::memory_resource* mr);
FlatVectorIndex(const SchemaField::VectorParams& params, PMR_NS::memory_resource* mr);
void Add(DocId id, DocumentAccessor* doc, std::string_view field) override;
void Remove(DocId id, DocumentAccessor* doc, std::string_view field) override;
@ -118,7 +121,7 @@ struct FlatVectorIndex : public BaseVectorIndex {
struct HnswlibAdapter;
struct HnswVectorIndex : public BaseVectorIndex {
HnswVectorIndex(size_t dim, VectorSimilarity sim, size_t capacity, PMR_NS::memory_resource* mr);
HnswVectorIndex(const SchemaField::VectorParams& params, PMR_NS::memory_resource* mr);
~HnswVectorIndex();
void Add(DocId id, DocumentAccessor* doc, std::string_view field) override;

View file

@ -471,10 +471,9 @@ void FieldIndices::CreateIndices(PMR_NS::memory_resource* mr) {
const auto& vparams = std::get<SchemaField::VectorParams>(field_info.special_params);
if (vparams.use_hnsw)
vector_index =
make_unique<HnswVectorIndex>(vparams.dim, vparams.sim, vparams.capacity, mr);
vector_index = make_unique<HnswVectorIndex>(vparams, mr);
else
vector_index = make_unique<FlatVectorIndex>(vparams.dim, vparams.sim, mr);
vector_index = make_unique<FlatVectorIndex>(vparams, mr);
indices_[field_ident] = std::move(vector_index);
break;

View file

@ -31,7 +31,9 @@ struct SchemaField {
size_t dim = 0u; // dimension of knn vectors
VectorSimilarity sim = VectorSimilarity::L2; // similarity type
size_t capacity = 1000; // initial capacity for hnsw world
size_t capacity = 1000; // initial capacity
size_t hnsw_m = 16;
};
using ParamsVariant = std::variant<std::monostate, VectorParams>;

View file

@ -46,36 +46,39 @@ bool IsValidJsonPath(string_view path) {
}
search::SchemaField::VectorParams ParseVectorParams(CmdArgParser* parser) {
size_t dim = 0;
auto sim = search::VectorSimilarity::L2;
size_t capacity = 1000;
search::SchemaField::VectorParams params{};
bool use_hnsw = parser->ToUpper().Switch("HNSW", true, "FLAT", false);
params.use_hnsw = parser->ToUpper().Switch("HNSW", true, "FLAT", false);
size_t num_args = parser->Next<size_t>();
for (size_t i = 0; i * 2 < num_args; i++) {
parser->ToUpper();
if (parser->Check("DIM").ExpectTail(1)) {
dim = parser->Next<size_t>();
params.dim = parser->Next<size_t>();
continue;
}
if (parser->Check("DISTANCE_METRIC").ExpectTail(1)) {
sim = parser->Switch("L2", search::VectorSimilarity::L2, "COSINE",
search::VectorSimilarity::COSINE);
params.sim = parser->Switch("L2", search::VectorSimilarity::L2, "COSINE",
search::VectorSimilarity::COSINE);
continue;
}
if (parser->Check("INITIAL_CAP").ExpectTail(1)) {
capacity = parser->Next<size_t>();
params.capacity = parser->Next<size_t>();
continue;
}
if (parser->Check("M").ExpectTail(1)) {
params.hnsw_m = parser->Next<size_t>();
continue;
}
parser->Skip(2);
}
return {use_hnsw, dim, sim, capacity};
return params;
}
optional<search::Schema> ParseSchemaOrReply(DocIndex::DataType type, CmdArgParser parser,
@ -343,11 +346,32 @@ void SearchFamily::FtCreate(CmdArgList args, ConnectionContext* cntx) {
if (auto err = parser.Error(); err)
return (*cntx)->SendError(err->MakeReply());
cntx->transaction->Schedule();
// Check if index already exists
atomic_uint exists_cnt = 0;
cntx->transaction->Execute(
[idx_name, &exists_cnt](auto* tx, auto* es) {
if (es->search_indices()->GetIndex(idx_name) != nullptr)
exists_cnt.fetch_add(1, std::memory_order_relaxed);
return OpStatus::OK;
},
false);
DCHECK(exists_cnt == 0u || exists_cnt == shard_set->size());
if (exists_cnt.load(memory_order_relaxed) > 0) {
cntx->transaction->Conclude();
return (*cntx)->SendError("Index already exists");
}
auto idx_ptr = make_shared<DocIndex>(move(index));
cntx->transaction->ScheduleSingleHop([idx_name, idx_ptr](auto* tx, auto* es) {
es->search_indices()->InitIndex(tx->GetOpArgs(es), idx_name, idx_ptr);
return OpStatus::OK;
});
cntx->transaction->Execute(
[idx_name, idx_ptr](auto* tx, auto* es) {
es->search_indices()->InitIndex(tx->GetOpArgs(es), idx_name, idx_ptr);
return OpStatus::OK;
},
true);
(*cntx)->SendOk();
}

View file

@ -72,6 +72,8 @@ TEST_F(SearchFamilyTest, CreateDropListIndex) {
EXPECT_EQ(Run({"ft.dropindex", "idx-2"}), "OK");
EXPECT_THAT(Run({"ft._list"}).GetVec(), testing::UnorderedElementsAre("idx-1", "idx-3"));
EXPECT_THAT(Run({"ft.create", "idx-1"}), ErrArg("Index already exists"));
EXPECT_THAT(Run({"ft.dropindex", "idx-100"}), ErrArg("Unknown Index name"));
EXPECT_EQ(Run({"ft.dropindex", "idx-1"}), "OK");