1
0
Fork 0
mirror of https://github.com/dragonflydb/dragonfly.git synced 2024-12-14 11:58:02 +00:00

Support fields in search mvp (#1184)

Field support for search mvp
This commit is contained in:
Vladislav 2023-05-07 01:02:08 +03:00 committed by GitHub
parent 5df0d92c44
commit dc853fe4bd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 260 additions and 41 deletions

View file

@ -3,6 +3,6 @@ gen_bison(parser)
cur_gen_dir(gen_dir)
add_library(query_parser ast_expr.cc query_driver.cc ${gen_dir}/parser.cc ${gen_dir}/lexer.cc)
add_library(query_parser base.cc ast_expr.cc query_driver.cc ${gen_dir}/parser.cc ${gen_dir}/lexer.cc)
target_link_libraries(query_parser base absl::strings TRDP::reflex)
cxx_test(search_parser_test query_parser LABELS DFLY)

View file

@ -4,6 +4,8 @@
#include "core/search/ast_expr.h"
#include <absl/strings/numbers.h>
#include <algorithm>
#include <regex>
@ -15,15 +17,17 @@ AstTermNode::AstTermNode(std::string term)
: term_{move(term)}, pattern_{"\\b" + term_ + "\\b", std::regex::icase} {
}
bool AstTermNode::Check(string_view input) const {
return regex_search(input.begin(), input.begin() + input.size(), pattern_);
bool AstTermNode::Check(SearchInput input) const {
return input.Check([this](string_view str) {
return regex_search(str.begin(), str.begin() + str.size(), pattern_);
});
}
string AstTermNode::Debug() const {
return "term{" + term_ + "}";
}
bool AstNegateNode::Check(string_view input) const {
bool AstNegateNode::Check(SearchInput input) const {
return !node_->Check(input);
}
@ -31,7 +35,7 @@ string AstNegateNode::Debug() const {
return "not{" + node_->Debug() + "}";
}
bool AstLogicalNode::Check(string_view input) const {
bool AstLogicalNode::Check(SearchInput input) const {
return op_ == kOr ? (l_->Check(input) || r_->Check(input))
: (l_->Check(input) && r_->Check(input));
}
@ -41,4 +45,25 @@ string AstLogicalNode::Debug() const {
return op + "{" + l_->Debug() + "," + r_->Debug() + "}";
}
bool AstFieldNode::Check(SearchInput input) const {
return node_->Check(SearchInput{input, field_});
}
string AstFieldNode::Debug() const {
return "field:" + field_ + "{" + node_->Debug() + "}";
}
bool AstRangeNode::Check(SearchInput input) const {
return input.Check([this](string_view str) {
int64_t v;
if (!absl::SimpleAtoi(str, &v))
return false;
return l_ <= v && v <= r_;
});
}
string AstRangeNode::Debug() const {
return "range{" + to_string(l_) + " " + to_string(r_) + "}";
}
} // namespace dfly::search

View file

@ -11,6 +11,8 @@
#include <regex>
#include <vector>
#include "core/search/base.h"
namespace dfly {
namespace search {
@ -21,7 +23,7 @@ class AstNode {
virtual ~AstNode() = default;
// Check if this input is matched by the node.
virtual bool Check(std::string_view input) const = 0;
virtual bool Check(SearchInput) const = 0;
// Debug print node.
virtual std::string Debug() const = 0;
@ -38,7 +40,7 @@ template <typename T, typename... Ts> AstExpr MakeExpr(Ts&&... ts) {
class AstTermNode : public AstNode {
public:
AstTermNode(std::string term);
virtual bool Check(std::string_view input) const;
virtual bool Check(SearchInput) const;
virtual std::string Debug() const;
private:
@ -46,19 +48,19 @@ class AstTermNode : public AstNode {
std::regex pattern_;
};
// Ast negation node, matches only if its sub node didn't match.
// Ast negation node, matches only if its subtree didn't match.
class AstNegateNode : public AstNode {
public:
AstNegateNode(NodePtr node) : node_{node} {
}
virtual bool Check(std::string_view input) const;
virtual std::string Debug() const;
bool Check(SearchInput) const override;
std::string Debug() const override;
private:
NodePtr node_;
};
// Ast logical operation node, matches only if sub nodes match
// Ast logical operation node, matches only if subtrees match
// in respect to logical operation (and/or).
class AstLogicalNode : public AstNode {
public:
@ -69,14 +71,41 @@ class AstLogicalNode : public AstNode {
AstLogicalNode(NodePtr l, NodePtr r, Op op) : l_{l}, r_{r}, op_{op} {
}
virtual bool Check(std::string_view input) const;
virtual std::string Debug() const;
bool Check(SearchInput) const override;
std::string Debug() const override;
private:
NodePtr l_, r_;
Op op_;
};
// Ast field node, selects a field from the input for its subtree.
class AstFieldNode : public AstNode {
public:
AstFieldNode(std::string field, NodePtr node) : field_{field.substr(1)}, node_{node} {
}
bool Check(SearchInput) const override;
std::string Debug() const override;
private:
std::string field_;
NodePtr node_;
};
// Ast range node, checks if input is inside int range
class AstRangeNode : public AstNode {
public:
AstRangeNode(int64_t l, int64_t r) : l_{l}, r_{r} {
}
bool Check(SearchInput) const override;
std::string Debug() const override;
private:
int64_t l_, r_;
};
} // namespace search
} // namespace dfly

12
src/core/search/base.cc Normal file
View file

@ -0,0 +1,12 @@
// Copyright 2023, DragonflyDB authors. All rights reserved.
// See LICENSE for licensing terms.
//
#include "core/search/base.h"
#include <algorithm>
#include <regex>
using namespace std;
namespace dfly::search {} // namespace dfly::search

43
src/core/search/base.h Normal file
View file

@ -0,0 +1,43 @@
#pragma once
#include <algorithm>
#include <iostream>
#include <memory>
#include <ostream>
#include <regex>
#include <variant>
#include <vector>
#include "core/core_types.h"
#include "core/string_map.h"
namespace dfly::search {
// Interface for accessing hashset values with different data structures underneath.
struct HSetAccessor {
// Callback that's supplied with field values.
using FieldConsumer = std::function<bool(std::string_view)>;
virtual bool Check(FieldConsumer f, std::string_view active_field) const = 0;
};
// Wrapper around hashset accessor and optional active field.
struct SearchInput {
SearchInput(const HSetAccessor* hset, std::string_view active_field = {})
: hset_{hset}, active_field_{active_field} {
}
SearchInput(const SearchInput& base, std::string_view active_field)
: hset_{base.hset_}, active_field_{active_field} {
}
bool Check(HSetAccessor::FieldConsumer f) {
return hset_->Check(move(f), active_field_);
}
private:
const HSetAccessor* hset_;
std::string_view active_field_;
};
} // namespace dfly::search

View file

@ -63,7 +63,7 @@ using namespace std;
%precedence LPAREN RPAREN
%token <int64_t> INT64 "int64"
%nterm <AstExpr> final_query filter search_expr field_filter field_cond range_value term_list opt_neg_term
%nterm <AstExpr> final_query filter search_expr field_cond field_cond_expr
%printer { yyo << $$; } <*>;
@ -76,26 +76,25 @@ filter:
search_expr { $$ = $1; }
search_expr:
LPAREN search_expr RPAREN { $$ = $2; }
| search_expr search_expr %prec AND_OP { $$ = MakeExpr<AstLogicalNode>($1, $2, AstLogicalNode::kAnd); };
| search_expr OR_OP search_expr { $$ = MakeExpr<AstLogicalNode>($1, $3, AstLogicalNode::kOr); }
| NOT_OP search_expr { $$ = MakeExpr<AstNegateNode>($2); };
| TERM { $$ = MakeExpr<AstTermNode>($1); }
| field_filter;
LPAREN search_expr RPAREN { $$ = $2; }
| search_expr search_expr %prec AND_OP { $$ = MakeExpr<AstLogicalNode>($1, $2, AstLogicalNode::kAnd); }
| search_expr OR_OP search_expr { $$ = MakeExpr<AstLogicalNode>($1, $3, AstLogicalNode::kOr); }
| NOT_OP search_expr { $$ = MakeExpr<AstNegateNode>($2); }
| TERM { $$ = MakeExpr<AstTermNode>($1); }
| FIELD COLON field_cond { $$ = MakeExpr<AstFieldNode>($1, $3); }
field_filter:
FIELD COLON field_cond { $$ = AstExpr{}; }
field_cond: term_list | range_value
range_value: LBRACKET INT64 INT64 RBRACKET { $$ = AstExpr{}; }
term_list:
opt_neg_term |
LPAREN term_list opt_neg_term RPAREN { };
opt_neg_term:
TERM { } | NOT_OP TERM { $$ = AstExpr{}; };
field_cond:
TERM { $$ = MakeExpr<AstTermNode>($1); }
| NOT_OP field_cond { $$ = MakeExpr<AstNegateNode>($2); }
| LPAREN field_cond_expr RPAREN { $$ = $2; }
| LBRACKET INT64 INT64 RBRACKET { $$ = MakeExpr<AstRangeNode>($2, $3); }
field_cond_expr:
LPAREN field_cond_expr RPAREN { $$ = $2; }
| field_cond_expr field_cond_expr %prec AND_OP { $$ = MakeExpr<AstLogicalNode>($1, $2, AstLogicalNode::kAnd); }
| field_cond_expr OR_OP field_cond_expr { $$ = MakeExpr<AstLogicalNode>($1, $3, AstLogicalNode::kOr); }
| NOT_OP field_cond_expr { $$ = MakeExpr<AstNegateNode>($2); };
| TERM { $$ = MakeExpr<AstTermNode>($1); }
%%
void

View file

@ -4,6 +4,7 @@
#include "base/gtest.h"
#include "base/logging.h"
#include "core/search/base.h"
#include "core/search/query_driver.h"
namespace dfly {
@ -38,7 +39,7 @@ class SearchParserTest : public ::testing::Test {
expr_ = query_driver_.Get();
}
bool Check(string_view input) const {
bool Check(SearchInput input) const {
return expr_->Check(input);
}
@ -51,6 +52,35 @@ class SearchParserTest : public ::testing::Test {
QueryDriver query_driver_;
};
class MockedHSetAccessor : public HSetAccessor {
public:
using Map = std::unordered_map<std::string, std::string>;
MockedHSetAccessor() = default;
MockedHSetAccessor(std::string test_field) : hset_{{"field", test_field}} {
}
bool Check(HSetAccessor::FieldConsumer f, string_view active_field) const override {
if (!active_field.empty()) {
auto it = hset_.find(string{active_field});
return f(it != hset_.end() ? it->second : "");
} else {
for (const auto& [k, v] : hset_) {
if (f(v))
return true;
}
return false;
}
}
void Set(Map hset) {
hset_ = hset;
}
private:
Map hset_{};
};
// tokens are not assignable, so we can not reuse them. This macros reduce the boilerplate.
#define NEXT_EQ(tok_enum, type, val) \
{ \
@ -75,16 +105,20 @@ class SearchParserTest : public ::testing::Test {
ASSERT_TRUE(caught); \
}
#define CHECK_ALL(...) \
{ \
for (auto input : {__VA_ARGS__}) \
EXPECT_TRUE(Check(input)) << input << " failed on " << DebugExpr(); \
#define CHECK_ALL(...) \
{ \
for (auto str : {__VA_ARGS__}) { \
MockedHSetAccessor hset{str}; \
EXPECT_TRUE(Check(SearchInput{&hset})) << str << " failed on " << DebugExpr(); \
} \
}
#define CHECK_NONE(...) \
{ \
for (auto input : {__VA_ARGS__}) \
EXPECT_FALSE(Check(input)) << input << " failed on " << DebugExpr(); \
#define CHECK_NONE(...) \
{ \
for (auto str : {__VA_ARGS__}) { \
MockedHSetAccessor hset{str}; \
EXPECT_FALSE(Check(SearchInput{&hset})) << str << " failed on " << DebugExpr(); \
} \
}
TEST_F(SearchParserTest, Scanner) {
@ -201,6 +235,83 @@ TEST_F(SearchParserTest, CheckParenthesisPriority) {
CHECK_NONE("wrong", "foo bar baz", "foo rab zab", "foo bar what", "foo rab foo");
}
TEST_F(SearchParserTest, MatchField) {
ParseExpr("@f1:foo @f2:bar @f3:baz");
MockedHSetAccessor hset{};
SearchInput input{&hset};
hset.Set({{"f1", "foo"}, {"f2", "bar"}, {"f3", "baz"}});
EXPECT_TRUE(Check(input));
hset.Set({{"f1", "foo"}, {"f2", "bar"}, {"f3", "last is wrong"}});
EXPECT_FALSE(Check(input));
hset.Set({{"f1", "its"}, {"f2", "totally"}, {"f3", "wrong"}});
EXPECT_FALSE(Check(input));
hset.Set({{"f1", "im foo but its only me and"}, {"f2", "bar"}});
EXPECT_FALSE(Check(input));
hset.Set({});
EXPECT_FALSE(Check(input));
}
TEST_F(SearchParserTest, MatchRange) {
ParseExpr("@f1:[1 10] @f2:[50 100]");
MockedHSetAccessor hset{};
SearchInput input{&hset};
hset.Set({{"f1", "5"}, {"f2", "50"}});
EXPECT_TRUE(Check(input));
hset.Set({{"f1", "1"}, {"f2", "100"}});
EXPECT_TRUE(Check(input));
hset.Set({{"f1", "10"}, {"f2", "50"}});
EXPECT_TRUE(Check(input));
hset.Set({{"f1", "11"}, {"f2", "49"}});
EXPECT_FALSE(Check(input));
hset.Set({{"f1", "0"}, {"f2", "101"}});
EXPECT_FALSE(Check(input));
}
TEST_F(SearchParserTest, CheckExprInField) {
ParseExpr("@f1:(a|b) @f2:(c d) @f3:-e");
MockedHSetAccessor hset{};
SearchInput input{&hset};
hset.Set({{"f1", "a"}, {"f2", "c and d"}, {"f3", "right"}});
EXPECT_TRUE(Check(input));
hset.Set({{"f1", "b"}, {"f2", "d and c"}, {"f3", "ok"}});
EXPECT_TRUE(Check(input));
hset.Set({{"f1", "none"}, {"f2", "only d"}, {"f3", "ok"}});
EXPECT_FALSE(Check(input));
hset.Set({{"f1", "b"}, {"f2", "d and c"}, {"f3", "it has an e"}});
EXPECT_FALSE(Check(input)) << DebugExpr();
ParseExpr({"@f1:(a (b | c) -(d | e)) @f2:-(a|b)"});
hset.Set({{"f1", "a b w"}, {"f2", "c"}});
EXPECT_TRUE(Check(input));
hset.Set({{"f1", "a b d"}, {"f2", "c"}});
EXPECT_FALSE(Check(input));
hset.Set({{"f1", "a b w"}, {"f2", "a"}});
EXPECT_FALSE(Check(input));
hset.Set({{"f1", "a w"}, {"f2", "c"}});
EXPECT_FALSE(Check(input));
}
} // namespace search
} // namespace dfly