mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2024-12-15 17:51:06 +00:00
fix(search): Small improvements (#2248)
This commit is contained in:
parent
26512fdba4
commit
f17bfaf0ff
6 changed files with 61 additions and 26 deletions
|
@ -149,8 +149,11 @@ std::pair<size_t /*dim*/, VectorSimilarity> BaseVectorIndex::Info() const {
|
|||
return {dim_, sim_};
|
||||
}
|
||||
|
||||
FlatVectorIndex::FlatVectorIndex(size_t dim, VectorSimilarity sim, PMR_NS::memory_resource* mr)
|
||||
: BaseVectorIndex{dim, sim}, entries_{mr} {
|
||||
FlatVectorIndex::FlatVectorIndex(const SchemaField::VectorParams& params,
|
||||
PMR_NS::memory_resource* mr)
|
||||
: BaseVectorIndex{params.dim, params.sim}, entries_{mr} {
|
||||
DCHECK(!params.use_hnsw);
|
||||
entries_.reserve(params.capacity * params.dim);
|
||||
}
|
||||
|
||||
void FlatVectorIndex::Add(DocId id, DocumentAccessor* doc, string_view field) {
|
||||
|
@ -174,8 +177,10 @@ const float* FlatVectorIndex::Get(DocId doc) const {
|
|||
}
|
||||
|
||||
struct HnswlibAdapter {
|
||||
HnswlibAdapter(size_t dim, VectorSimilarity sim, size_t cap)
|
||||
: space_{MakeSpace(dim, sim)}, world_{GetSpacePtr(), cap} {
|
||||
HnswlibAdapter(const SchemaField::VectorParams& params)
|
||||
: space_{MakeSpace(params.dim, params.sim)}, world_{GetSpacePtr(), params.capacity,
|
||||
params.hnsw_m, 200,
|
||||
100, true} {
|
||||
}
|
||||
|
||||
void Add(float* data, DocId id) {
|
||||
|
@ -235,9 +240,9 @@ struct HnswlibAdapter {
|
|||
hnswlib::HierarchicalNSW<float> world_;
|
||||
};
|
||||
|
||||
HnswVectorIndex::HnswVectorIndex(size_t dim, VectorSimilarity sim, size_t capacity,
|
||||
PMR_NS::memory_resource*)
|
||||
: BaseVectorIndex{dim, sim}, adapter_{make_unique<HnswlibAdapter>(dim, sim, capacity)} {
|
||||
HnswVectorIndex::HnswVectorIndex(const SchemaField::VectorParams& params, PMR_NS::memory_resource*)
|
||||
: BaseVectorIndex{params.dim, params.sim}, adapter_{make_unique<HnswlibAdapter>(params)} {
|
||||
DCHECK(params.use_hnsw);
|
||||
// TODO: Patch hnsw to use MR
|
||||
}
|
||||
|
||||
|
|
|
@ -15,6 +15,9 @@
|
|||
#include "core/search/base.h"
|
||||
#include "core/search/compressed_sorted_set.h"
|
||||
|
||||
// TODO: move core field definitions out of big header
|
||||
#include "core/search/search.h"
|
||||
|
||||
namespace dfly::search {
|
||||
|
||||
// Index for integer fields.
|
||||
|
@ -104,7 +107,7 @@ struct BaseVectorIndex : public BaseIndex {
|
|||
// Index for vector fields.
|
||||
// Only supports lookup by id.
|
||||
struct FlatVectorIndex : public BaseVectorIndex {
|
||||
FlatVectorIndex(size_t dim, VectorSimilarity sim, PMR_NS::memory_resource* mr);
|
||||
FlatVectorIndex(const SchemaField::VectorParams& params, PMR_NS::memory_resource* mr);
|
||||
|
||||
void Add(DocId id, DocumentAccessor* doc, std::string_view field) override;
|
||||
void Remove(DocId id, DocumentAccessor* doc, std::string_view field) override;
|
||||
|
@ -118,7 +121,7 @@ struct FlatVectorIndex : public BaseVectorIndex {
|
|||
struct HnswlibAdapter;
|
||||
|
||||
struct HnswVectorIndex : public BaseVectorIndex {
|
||||
HnswVectorIndex(size_t dim, VectorSimilarity sim, size_t capacity, PMR_NS::memory_resource* mr);
|
||||
HnswVectorIndex(const SchemaField::VectorParams& params, PMR_NS::memory_resource* mr);
|
||||
~HnswVectorIndex();
|
||||
|
||||
void Add(DocId id, DocumentAccessor* doc, std::string_view field) override;
|
||||
|
|
|
@ -471,10 +471,9 @@ void FieldIndices::CreateIndices(PMR_NS::memory_resource* mr) {
|
|||
const auto& vparams = std::get<SchemaField::VectorParams>(field_info.special_params);
|
||||
|
||||
if (vparams.use_hnsw)
|
||||
vector_index =
|
||||
make_unique<HnswVectorIndex>(vparams.dim, vparams.sim, vparams.capacity, mr);
|
||||
vector_index = make_unique<HnswVectorIndex>(vparams, mr);
|
||||
else
|
||||
vector_index = make_unique<FlatVectorIndex>(vparams.dim, vparams.sim, mr);
|
||||
vector_index = make_unique<FlatVectorIndex>(vparams, mr);
|
||||
|
||||
indices_[field_ident] = std::move(vector_index);
|
||||
break;
|
||||
|
|
|
@ -31,7 +31,9 @@ struct SchemaField {
|
|||
|
||||
size_t dim = 0u; // dimension of knn vectors
|
||||
VectorSimilarity sim = VectorSimilarity::L2; // similarity type
|
||||
size_t capacity = 1000; // initial capacity for hnsw world
|
||||
size_t capacity = 1000; // initial capacity
|
||||
|
||||
size_t hnsw_m = 16;
|
||||
};
|
||||
|
||||
using ParamsVariant = std::variant<std::monostate, VectorParams>;
|
||||
|
|
|
@ -46,36 +46,39 @@ bool IsValidJsonPath(string_view path) {
|
|||
}
|
||||
|
||||
search::SchemaField::VectorParams ParseVectorParams(CmdArgParser* parser) {
|
||||
size_t dim = 0;
|
||||
auto sim = search::VectorSimilarity::L2;
|
||||
size_t capacity = 1000;
|
||||
search::SchemaField::VectorParams params{};
|
||||
|
||||
bool use_hnsw = parser->ToUpper().Switch("HNSW", true, "FLAT", false);
|
||||
params.use_hnsw = parser->ToUpper().Switch("HNSW", true, "FLAT", false);
|
||||
size_t num_args = parser->Next<size_t>();
|
||||
|
||||
for (size_t i = 0; i * 2 < num_args; i++) {
|
||||
parser->ToUpper();
|
||||
|
||||
if (parser->Check("DIM").ExpectTail(1)) {
|
||||
dim = parser->Next<size_t>();
|
||||
params.dim = parser->Next<size_t>();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (parser->Check("DISTANCE_METRIC").ExpectTail(1)) {
|
||||
sim = parser->Switch("L2", search::VectorSimilarity::L2, "COSINE",
|
||||
search::VectorSimilarity::COSINE);
|
||||
params.sim = parser->Switch("L2", search::VectorSimilarity::L2, "COSINE",
|
||||
search::VectorSimilarity::COSINE);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (parser->Check("INITIAL_CAP").ExpectTail(1)) {
|
||||
capacity = parser->Next<size_t>();
|
||||
params.capacity = parser->Next<size_t>();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (parser->Check("M").ExpectTail(1)) {
|
||||
params.hnsw_m = parser->Next<size_t>();
|
||||
continue;
|
||||
}
|
||||
|
||||
parser->Skip(2);
|
||||
}
|
||||
|
||||
return {use_hnsw, dim, sim, capacity};
|
||||
return params;
|
||||
}
|
||||
|
||||
optional<search::Schema> ParseSchemaOrReply(DocIndex::DataType type, CmdArgParser parser,
|
||||
|
@ -343,11 +346,32 @@ void SearchFamily::FtCreate(CmdArgList args, ConnectionContext* cntx) {
|
|||
if (auto err = parser.Error(); err)
|
||||
return (*cntx)->SendError(err->MakeReply());
|
||||
|
||||
cntx->transaction->Schedule();
|
||||
|
||||
// Check if index already exists
|
||||
atomic_uint exists_cnt = 0;
|
||||
cntx->transaction->Execute(
|
||||
[idx_name, &exists_cnt](auto* tx, auto* es) {
|
||||
if (es->search_indices()->GetIndex(idx_name) != nullptr)
|
||||
exists_cnt.fetch_add(1, std::memory_order_relaxed);
|
||||
return OpStatus::OK;
|
||||
},
|
||||
false);
|
||||
|
||||
DCHECK(exists_cnt == 0u || exists_cnt == shard_set->size());
|
||||
|
||||
if (exists_cnt.load(memory_order_relaxed) > 0) {
|
||||
cntx->transaction->Conclude();
|
||||
return (*cntx)->SendError("Index already exists");
|
||||
}
|
||||
|
||||
auto idx_ptr = make_shared<DocIndex>(move(index));
|
||||
cntx->transaction->ScheduleSingleHop([idx_name, idx_ptr](auto* tx, auto* es) {
|
||||
es->search_indices()->InitIndex(tx->GetOpArgs(es), idx_name, idx_ptr);
|
||||
return OpStatus::OK;
|
||||
});
|
||||
cntx->transaction->Execute(
|
||||
[idx_name, idx_ptr](auto* tx, auto* es) {
|
||||
es->search_indices()->InitIndex(tx->GetOpArgs(es), idx_name, idx_ptr);
|
||||
return OpStatus::OK;
|
||||
},
|
||||
true);
|
||||
|
||||
(*cntx)->SendOk();
|
||||
}
|
||||
|
|
|
@ -72,6 +72,8 @@ TEST_F(SearchFamilyTest, CreateDropListIndex) {
|
|||
EXPECT_EQ(Run({"ft.dropindex", "idx-2"}), "OK");
|
||||
EXPECT_THAT(Run({"ft._list"}).GetVec(), testing::UnorderedElementsAre("idx-1", "idx-3"));
|
||||
|
||||
EXPECT_THAT(Run({"ft.create", "idx-1"}), ErrArg("Index already exists"));
|
||||
|
||||
EXPECT_THAT(Run({"ft.dropindex", "idx-100"}), ErrArg("Unknown Index name"));
|
||||
|
||||
EXPECT_EQ(Run({"ft.dropindex", "idx-1"}), "OK");
|
||||
|
|
Loading…
Reference in a new issue