mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2024-12-14 11:58:02 +00:00
parent
5df0d92c44
commit
dc853fe4bd
7 changed files with 260 additions and 41 deletions
|
@ -3,6 +3,6 @@ gen_bison(parser)
|
|||
|
||||
cur_gen_dir(gen_dir)
|
||||
|
||||
add_library(query_parser ast_expr.cc query_driver.cc ${gen_dir}/parser.cc ${gen_dir}/lexer.cc)
|
||||
add_library(query_parser base.cc ast_expr.cc query_driver.cc ${gen_dir}/parser.cc ${gen_dir}/lexer.cc)
|
||||
target_link_libraries(query_parser base absl::strings TRDP::reflex)
|
||||
cxx_test(search_parser_test query_parser LABELS DFLY)
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
|
||||
#include "core/search/ast_expr.h"
|
||||
|
||||
#include <absl/strings/numbers.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <regex>
|
||||
|
||||
|
@ -15,15 +17,17 @@ AstTermNode::AstTermNode(std::string term)
|
|||
: term_{move(term)}, pattern_{"\\b" + term_ + "\\b", std::regex::icase} {
|
||||
}
|
||||
|
||||
bool AstTermNode::Check(string_view input) const {
|
||||
return regex_search(input.begin(), input.begin() + input.size(), pattern_);
|
||||
bool AstTermNode::Check(SearchInput input) const {
|
||||
return input.Check([this](string_view str) {
|
||||
return regex_search(str.begin(), str.begin() + str.size(), pattern_);
|
||||
});
|
||||
}
|
||||
|
||||
string AstTermNode::Debug() const {
|
||||
return "term{" + term_ + "}";
|
||||
}
|
||||
|
||||
bool AstNegateNode::Check(string_view input) const {
|
||||
bool AstNegateNode::Check(SearchInput input) const {
|
||||
return !node_->Check(input);
|
||||
}
|
||||
|
||||
|
@ -31,7 +35,7 @@ string AstNegateNode::Debug() const {
|
|||
return "not{" + node_->Debug() + "}";
|
||||
}
|
||||
|
||||
bool AstLogicalNode::Check(string_view input) const {
|
||||
bool AstLogicalNode::Check(SearchInput input) const {
|
||||
return op_ == kOr ? (l_->Check(input) || r_->Check(input))
|
||||
: (l_->Check(input) && r_->Check(input));
|
||||
}
|
||||
|
@ -41,4 +45,25 @@ string AstLogicalNode::Debug() const {
|
|||
return op + "{" + l_->Debug() + "," + r_->Debug() + "}";
|
||||
}
|
||||
|
||||
bool AstFieldNode::Check(SearchInput input) const {
|
||||
return node_->Check(SearchInput{input, field_});
|
||||
}
|
||||
|
||||
string AstFieldNode::Debug() const {
|
||||
return "field:" + field_ + "{" + node_->Debug() + "}";
|
||||
}
|
||||
|
||||
bool AstRangeNode::Check(SearchInput input) const {
|
||||
return input.Check([this](string_view str) {
|
||||
int64_t v;
|
||||
if (!absl::SimpleAtoi(str, &v))
|
||||
return false;
|
||||
return l_ <= v && v <= r_;
|
||||
});
|
||||
}
|
||||
|
||||
string AstRangeNode::Debug() const {
|
||||
return "range{" + to_string(l_) + " " + to_string(r_) + "}";
|
||||
}
|
||||
|
||||
} // namespace dfly::search
|
||||
|
|
|
@ -11,6 +11,8 @@
|
|||
#include <regex>
|
||||
#include <vector>
|
||||
|
||||
#include "core/search/base.h"
|
||||
|
||||
namespace dfly {
|
||||
|
||||
namespace search {
|
||||
|
@ -21,7 +23,7 @@ class AstNode {
|
|||
virtual ~AstNode() = default;
|
||||
|
||||
// Check if this input is matched by the node.
|
||||
virtual bool Check(std::string_view input) const = 0;
|
||||
virtual bool Check(SearchInput) const = 0;
|
||||
|
||||
// Debug print node.
|
||||
virtual std::string Debug() const = 0;
|
||||
|
@ -38,7 +40,7 @@ template <typename T, typename... Ts> AstExpr MakeExpr(Ts&&... ts) {
|
|||
class AstTermNode : public AstNode {
|
||||
public:
|
||||
AstTermNode(std::string term);
|
||||
virtual bool Check(std::string_view input) const;
|
||||
virtual bool Check(SearchInput) const;
|
||||
virtual std::string Debug() const;
|
||||
|
||||
private:
|
||||
|
@ -46,19 +48,19 @@ class AstTermNode : public AstNode {
|
|||
std::regex pattern_;
|
||||
};
|
||||
|
||||
// Ast negation node, matches only if its sub node didn't match.
|
||||
// Ast negation node, matches only if its subtree didn't match.
|
||||
class AstNegateNode : public AstNode {
|
||||
public:
|
||||
AstNegateNode(NodePtr node) : node_{node} {
|
||||
}
|
||||
virtual bool Check(std::string_view input) const;
|
||||
virtual std::string Debug() const;
|
||||
bool Check(SearchInput) const override;
|
||||
std::string Debug() const override;
|
||||
|
||||
private:
|
||||
NodePtr node_;
|
||||
};
|
||||
|
||||
// Ast logical operation node, matches only if sub nodes match
|
||||
// Ast logical operation node, matches only if subtrees match
|
||||
// in respect to logical operation (and/or).
|
||||
class AstLogicalNode : public AstNode {
|
||||
public:
|
||||
|
@ -69,14 +71,41 @@ class AstLogicalNode : public AstNode {
|
|||
|
||||
AstLogicalNode(NodePtr l, NodePtr r, Op op) : l_{l}, r_{r}, op_{op} {
|
||||
}
|
||||
virtual bool Check(std::string_view input) const;
|
||||
virtual std::string Debug() const;
|
||||
bool Check(SearchInput) const override;
|
||||
std::string Debug() const override;
|
||||
|
||||
private:
|
||||
NodePtr l_, r_;
|
||||
Op op_;
|
||||
};
|
||||
|
||||
// Ast field node, selects a field from the input for its subtree.
|
||||
class AstFieldNode : public AstNode {
|
||||
public:
|
||||
AstFieldNode(std::string field, NodePtr node) : field_{field.substr(1)}, node_{node} {
|
||||
}
|
||||
|
||||
bool Check(SearchInput) const override;
|
||||
std::string Debug() const override;
|
||||
|
||||
private:
|
||||
std::string field_;
|
||||
NodePtr node_;
|
||||
};
|
||||
|
||||
// Ast range node, checks if input is inside int range
|
||||
class AstRangeNode : public AstNode {
|
||||
public:
|
||||
AstRangeNode(int64_t l, int64_t r) : l_{l}, r_{r} {
|
||||
}
|
||||
|
||||
bool Check(SearchInput) const override;
|
||||
std::string Debug() const override;
|
||||
|
||||
private:
|
||||
int64_t l_, r_;
|
||||
};
|
||||
|
||||
} // namespace search
|
||||
} // namespace dfly
|
||||
|
||||
|
|
12
src/core/search/base.cc
Normal file
12
src/core/search/base.cc
Normal file
|
@ -0,0 +1,12 @@
|
|||
// Copyright 2023, DragonflyDB authors. All rights reserved.
|
||||
// See LICENSE for licensing terms.
|
||||
//
|
||||
|
||||
#include "core/search/base.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <regex>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace dfly::search {} // namespace dfly::search
|
43
src/core/search/base.h
Normal file
43
src/core/search/base.h
Normal file
|
@ -0,0 +1,43 @@
|
|||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <ostream>
|
||||
#include <regex>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include "core/core_types.h"
|
||||
#include "core/string_map.h"
|
||||
|
||||
namespace dfly::search {
|
||||
|
||||
// Interface for accessing hashset values with different data structures underneath.
|
||||
struct HSetAccessor {
|
||||
// Callback that's supplied with field values.
|
||||
using FieldConsumer = std::function<bool(std::string_view)>;
|
||||
|
||||
virtual bool Check(FieldConsumer f, std::string_view active_field) const = 0;
|
||||
};
|
||||
|
||||
// Wrapper around hashset accessor and optional active field.
|
||||
struct SearchInput {
|
||||
SearchInput(const HSetAccessor* hset, std::string_view active_field = {})
|
||||
: hset_{hset}, active_field_{active_field} {
|
||||
}
|
||||
|
||||
SearchInput(const SearchInput& base, std::string_view active_field)
|
||||
: hset_{base.hset_}, active_field_{active_field} {
|
||||
}
|
||||
|
||||
bool Check(HSetAccessor::FieldConsumer f) {
|
||||
return hset_->Check(move(f), active_field_);
|
||||
}
|
||||
|
||||
private:
|
||||
const HSetAccessor* hset_;
|
||||
std::string_view active_field_;
|
||||
};
|
||||
|
||||
} // namespace dfly::search
|
|
@ -63,7 +63,7 @@ using namespace std;
|
|||
%precedence LPAREN RPAREN
|
||||
|
||||
%token <int64_t> INT64 "int64"
|
||||
%nterm <AstExpr> final_query filter search_expr field_filter field_cond range_value term_list opt_neg_term
|
||||
%nterm <AstExpr> final_query filter search_expr field_cond field_cond_expr
|
||||
|
||||
%printer { yyo << $$; } <*>;
|
||||
|
||||
|
@ -76,26 +76,25 @@ filter:
|
|||
search_expr { $$ = $1; }
|
||||
|
||||
search_expr:
|
||||
LPAREN search_expr RPAREN { $$ = $2; }
|
||||
| search_expr search_expr %prec AND_OP { $$ = MakeExpr<AstLogicalNode>($1, $2, AstLogicalNode::kAnd); };
|
||||
| search_expr OR_OP search_expr { $$ = MakeExpr<AstLogicalNode>($1, $3, AstLogicalNode::kOr); }
|
||||
| NOT_OP search_expr { $$ = MakeExpr<AstNegateNode>($2); };
|
||||
| TERM { $$ = MakeExpr<AstTermNode>($1); }
|
||||
| field_filter;
|
||||
LPAREN search_expr RPAREN { $$ = $2; }
|
||||
| search_expr search_expr %prec AND_OP { $$ = MakeExpr<AstLogicalNode>($1, $2, AstLogicalNode::kAnd); }
|
||||
| search_expr OR_OP search_expr { $$ = MakeExpr<AstLogicalNode>($1, $3, AstLogicalNode::kOr); }
|
||||
| NOT_OP search_expr { $$ = MakeExpr<AstNegateNode>($2); }
|
||||
| TERM { $$ = MakeExpr<AstTermNode>($1); }
|
||||
| FIELD COLON field_cond { $$ = MakeExpr<AstFieldNode>($1, $3); }
|
||||
|
||||
field_filter:
|
||||
FIELD COLON field_cond { $$ = AstExpr{}; }
|
||||
|
||||
field_cond: term_list | range_value
|
||||
range_value: LBRACKET INT64 INT64 RBRACKET { $$ = AstExpr{}; }
|
||||
|
||||
term_list:
|
||||
opt_neg_term |
|
||||
LPAREN term_list opt_neg_term RPAREN { };
|
||||
|
||||
opt_neg_term:
|
||||
TERM { } | NOT_OP TERM { $$ = AstExpr{}; };
|
||||
field_cond:
|
||||
TERM { $$ = MakeExpr<AstTermNode>($1); }
|
||||
| NOT_OP field_cond { $$ = MakeExpr<AstNegateNode>($2); }
|
||||
| LPAREN field_cond_expr RPAREN { $$ = $2; }
|
||||
| LBRACKET INT64 INT64 RBRACKET { $$ = MakeExpr<AstRangeNode>($2, $3); }
|
||||
|
||||
field_cond_expr:
|
||||
LPAREN field_cond_expr RPAREN { $$ = $2; }
|
||||
| field_cond_expr field_cond_expr %prec AND_OP { $$ = MakeExpr<AstLogicalNode>($1, $2, AstLogicalNode::kAnd); }
|
||||
| field_cond_expr OR_OP field_cond_expr { $$ = MakeExpr<AstLogicalNode>($1, $3, AstLogicalNode::kOr); }
|
||||
| NOT_OP field_cond_expr { $$ = MakeExpr<AstNegateNode>($2); };
|
||||
| TERM { $$ = MakeExpr<AstTermNode>($1); }
|
||||
%%
|
||||
|
||||
void
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#include "base/gtest.h"
|
||||
#include "base/logging.h"
|
||||
#include "core/search/base.h"
|
||||
#include "core/search/query_driver.h"
|
||||
|
||||
namespace dfly {
|
||||
|
@ -38,7 +39,7 @@ class SearchParserTest : public ::testing::Test {
|
|||
expr_ = query_driver_.Get();
|
||||
}
|
||||
|
||||
bool Check(string_view input) const {
|
||||
bool Check(SearchInput input) const {
|
||||
return expr_->Check(input);
|
||||
}
|
||||
|
||||
|
@ -51,6 +52,35 @@ class SearchParserTest : public ::testing::Test {
|
|||
QueryDriver query_driver_;
|
||||
};
|
||||
|
||||
class MockedHSetAccessor : public HSetAccessor {
|
||||
public:
|
||||
using Map = std::unordered_map<std::string, std::string>;
|
||||
|
||||
MockedHSetAccessor() = default;
|
||||
MockedHSetAccessor(std::string test_field) : hset_{{"field", test_field}} {
|
||||
}
|
||||
|
||||
bool Check(HSetAccessor::FieldConsumer f, string_view active_field) const override {
|
||||
if (!active_field.empty()) {
|
||||
auto it = hset_.find(string{active_field});
|
||||
return f(it != hset_.end() ? it->second : "");
|
||||
} else {
|
||||
for (const auto& [k, v] : hset_) {
|
||||
if (f(v))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void Set(Map hset) {
|
||||
hset_ = hset;
|
||||
}
|
||||
|
||||
private:
|
||||
Map hset_{};
|
||||
};
|
||||
|
||||
// tokens are not assignable, so we can not reuse them. This macros reduce the boilerplate.
|
||||
#define NEXT_EQ(tok_enum, type, val) \
|
||||
{ \
|
||||
|
@ -75,16 +105,20 @@ class SearchParserTest : public ::testing::Test {
|
|||
ASSERT_TRUE(caught); \
|
||||
}
|
||||
|
||||
#define CHECK_ALL(...) \
|
||||
{ \
|
||||
for (auto input : {__VA_ARGS__}) \
|
||||
EXPECT_TRUE(Check(input)) << input << " failed on " << DebugExpr(); \
|
||||
#define CHECK_ALL(...) \
|
||||
{ \
|
||||
for (auto str : {__VA_ARGS__}) { \
|
||||
MockedHSetAccessor hset{str}; \
|
||||
EXPECT_TRUE(Check(SearchInput{&hset})) << str << " failed on " << DebugExpr(); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define CHECK_NONE(...) \
|
||||
{ \
|
||||
for (auto input : {__VA_ARGS__}) \
|
||||
EXPECT_FALSE(Check(input)) << input << " failed on " << DebugExpr(); \
|
||||
#define CHECK_NONE(...) \
|
||||
{ \
|
||||
for (auto str : {__VA_ARGS__}) { \
|
||||
MockedHSetAccessor hset{str}; \
|
||||
EXPECT_FALSE(Check(SearchInput{&hset})) << str << " failed on " << DebugExpr(); \
|
||||
} \
|
||||
}
|
||||
|
||||
TEST_F(SearchParserTest, Scanner) {
|
||||
|
@ -201,6 +235,83 @@ TEST_F(SearchParserTest, CheckParenthesisPriority) {
|
|||
CHECK_NONE("wrong", "foo bar baz", "foo rab zab", "foo bar what", "foo rab foo");
|
||||
}
|
||||
|
||||
TEST_F(SearchParserTest, MatchField) {
|
||||
ParseExpr("@f1:foo @f2:bar @f3:baz");
|
||||
|
||||
MockedHSetAccessor hset{};
|
||||
SearchInput input{&hset};
|
||||
|
||||
hset.Set({{"f1", "foo"}, {"f2", "bar"}, {"f3", "baz"}});
|
||||
EXPECT_TRUE(Check(input));
|
||||
|
||||
hset.Set({{"f1", "foo"}, {"f2", "bar"}, {"f3", "last is wrong"}});
|
||||
EXPECT_FALSE(Check(input));
|
||||
|
||||
hset.Set({{"f1", "its"}, {"f2", "totally"}, {"f3", "wrong"}});
|
||||
EXPECT_FALSE(Check(input));
|
||||
|
||||
hset.Set({{"f1", "im foo but its only me and"}, {"f2", "bar"}});
|
||||
EXPECT_FALSE(Check(input));
|
||||
|
||||
hset.Set({});
|
||||
EXPECT_FALSE(Check(input));
|
||||
}
|
||||
|
||||
TEST_F(SearchParserTest, MatchRange) {
|
||||
ParseExpr("@f1:[1 10] @f2:[50 100]");
|
||||
|
||||
MockedHSetAccessor hset{};
|
||||
SearchInput input{&hset};
|
||||
|
||||
hset.Set({{"f1", "5"}, {"f2", "50"}});
|
||||
EXPECT_TRUE(Check(input));
|
||||
|
||||
hset.Set({{"f1", "1"}, {"f2", "100"}});
|
||||
EXPECT_TRUE(Check(input));
|
||||
|
||||
hset.Set({{"f1", "10"}, {"f2", "50"}});
|
||||
EXPECT_TRUE(Check(input));
|
||||
|
||||
hset.Set({{"f1", "11"}, {"f2", "49"}});
|
||||
EXPECT_FALSE(Check(input));
|
||||
|
||||
hset.Set({{"f1", "0"}, {"f2", "101"}});
|
||||
EXPECT_FALSE(Check(input));
|
||||
}
|
||||
|
||||
TEST_F(SearchParserTest, CheckExprInField) {
|
||||
ParseExpr("@f1:(a|b) @f2:(c d) @f3:-e");
|
||||
|
||||
MockedHSetAccessor hset{};
|
||||
SearchInput input{&hset};
|
||||
|
||||
hset.Set({{"f1", "a"}, {"f2", "c and d"}, {"f3", "right"}});
|
||||
EXPECT_TRUE(Check(input));
|
||||
|
||||
hset.Set({{"f1", "b"}, {"f2", "d and c"}, {"f3", "ok"}});
|
||||
EXPECT_TRUE(Check(input));
|
||||
|
||||
hset.Set({{"f1", "none"}, {"f2", "only d"}, {"f3", "ok"}});
|
||||
EXPECT_FALSE(Check(input));
|
||||
|
||||
hset.Set({{"f1", "b"}, {"f2", "d and c"}, {"f3", "it has an e"}});
|
||||
EXPECT_FALSE(Check(input)) << DebugExpr();
|
||||
|
||||
ParseExpr({"@f1:(a (b | c) -(d | e)) @f2:-(a|b)"});
|
||||
|
||||
hset.Set({{"f1", "a b w"}, {"f2", "c"}});
|
||||
EXPECT_TRUE(Check(input));
|
||||
|
||||
hset.Set({{"f1", "a b d"}, {"f2", "c"}});
|
||||
EXPECT_FALSE(Check(input));
|
||||
|
||||
hset.Set({{"f1", "a b w"}, {"f2", "a"}});
|
||||
EXPECT_FALSE(Check(input));
|
||||
|
||||
hset.Set({{"f1", "a w"}, {"f2", "c"}});
|
||||
EXPECT_FALSE(Check(input));
|
||||
}
|
||||
|
||||
} // namespace search
|
||||
|
||||
} // namespace dfly
|
||||
|
|
Loading…
Reference in a new issue