mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2024-12-14 11:58:02 +00:00
chore: support more token types in the lexer (#1134)
1. Support integers 2. Support string literals 3. Add more test coverage. Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
parent
8749d736dd
commit
ce5db032fc
7 changed files with 91 additions and 28 deletions
|
@ -6,5 +6,5 @@ gen_bison(parser)
|
|||
cur_gen_dir(gen_dir)
|
||||
|
||||
add_library(query_parser query_driver.cc ${gen_dir}/parser.cc ${gen_dir}/lexer.cc)
|
||||
target_link_libraries(query_parser glog)
|
||||
target_link_libraries(query_parser base absl::strings)
|
||||
cxx_test(search_parser_test query_parser)
|
||||
|
|
|
@ -1,5 +1,11 @@
|
|||
|
||||
/* Seems that flex does not have unicode support.
|
||||
TODO: to consider https://en.wikipedia.org/wiki/RE/flex in the future.
|
||||
*/
|
||||
%{
|
||||
#include <climits>
|
||||
#include <absl/strings/escaping.h>
|
||||
#include <absl/strings/numbers.h>
|
||||
#include "base/logging.h"
|
||||
#include "core/search/query_driver.h"
|
||||
|
||||
// Define main lexer function. QueryDriver is the shared state between scanner and parser
|
||||
|
@ -15,12 +21,17 @@
|
|||
%{
|
||||
// A number symbol corresponding to the value in S.
|
||||
using dfly::search::Parser;
|
||||
using namespace std;
|
||||
|
||||
Parser::symbol_type make_NUMBER (const std::string &s, const Parser::location_type& loc);
|
||||
Parser::symbol_type make_INT64 (string_view, const Parser::location_type& loc);
|
||||
Parser::symbol_type make_StringLit(string_view src, const Parser::location_type& loc);
|
||||
%}
|
||||
|
||||
int [0-9]+
|
||||
blank [ \t\r]
|
||||
dq \"
|
||||
esc_chars ['"\?\\abfnrtv]
|
||||
esc_seq \\{esc_chars}
|
||||
str_char ([^"]|{esc_seq})
|
||||
|
||||
%{
|
||||
// Code run each time a pattern is matched.
|
||||
|
@ -43,18 +54,34 @@ blank [ \t\r]
|
|||
"(" return Parser::make_LPAREN (loc);
|
||||
")" return Parser::make_RPAREN (loc);
|
||||
|
||||
{int} return make_NUMBER (yytext, loc);
|
||||
[^ \t\r]+ return Parser::make_TERM (yytext, loc);
|
||||
-?[0-9]+ return make_INT64(Matched(), loc);
|
||||
|
||||
{dq}{str_char}*{dq} return make_StringLit(string_view{YYText(), size_t(YYLeng())}, loc);
|
||||
|
||||
[[:alnum:]]+ return Parser::make_TERM(Matched(), loc);
|
||||
|
||||
<<EOF>> return Parser::make_YYEOF(loc);
|
||||
%%
|
||||
|
||||
Parser::symbol_type
|
||||
make_NUMBER (const std::string &s, const Parser::location_type& loc)
|
||||
make_INT64 (string_view str, const Parser::location_type& loc)
|
||||
{
|
||||
errno = 0;
|
||||
long n = strtol (s.c_str(), NULL, 10);
|
||||
if (! (INT_MIN <= n && n <= INT_MAX && errno != ERANGE))
|
||||
throw Parser::syntax_error (loc, "integer is out of range: " + s);
|
||||
return Parser::make_NUMBER ((int) n, loc);
|
||||
int64_t val = 0;
|
||||
if (!absl::SimpleAtoi(str, &val))
|
||||
throw Parser::syntax_error (loc, "not an integer or out of range: " + string(str));
|
||||
|
||||
return Parser::make_INT64(val, loc);
|
||||
}
|
||||
|
||||
Parser::symbol_type make_StringLit(string_view src, const Parser::location_type& loc) {
|
||||
DCHECK_GE(src.size(), 2u);
|
||||
|
||||
// Remove quotes
|
||||
src.remove_prefix(1);
|
||||
src.remove_suffix(1);
|
||||
string res;
|
||||
if (!absl::CUnescape(src, &res)) {
|
||||
throw Parser::syntax_error (loc, "bad escaped string: " + string(src));
|
||||
}
|
||||
return Parser::make_TERM(res, loc);
|
||||
}
|
||||
|
|
|
@ -44,7 +44,7 @@
|
|||
|
||||
%token YYEOF
|
||||
%token <std::string> TERM "term"
|
||||
%token <int> NUMBER "number"
|
||||
%token <int64_t> INT64 "int64"
|
||||
%nterm <int> bool_expr
|
||||
|
||||
%printer { yyo << $$; } <*>;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright 2023, Roman Gershman. All rights reserved.
|
||||
// Copyright 2023, DragonflyDB authors. All rights reserved.
|
||||
// See LICENSE for licensing terms.
|
||||
//
|
||||
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
// Copyright 2023, Roman Gershman. All rights reserved.
|
||||
// Copyright 2023, DragonflyDB authors. All rights reserved.
|
||||
// See LICENSE for licensing terms.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
|
||||
#include "core/search/parser.hh"
|
||||
#include "core/search/scanner.h"
|
||||
|
@ -22,9 +23,19 @@ class QueryDriver {
|
|||
return scanner_.get();
|
||||
}
|
||||
|
||||
void SetInput(const std::string& str) {
|
||||
istr_.str(str);
|
||||
scanner()->switch_streams(&istr_);
|
||||
}
|
||||
|
||||
Parser::symbol_type Lex() {
|
||||
return scanner()->ParserLex(*this);
|
||||
}
|
||||
|
||||
Parser::location_type location;
|
||||
|
||||
private:
|
||||
std::istringstream istr_;
|
||||
std::unique_ptr<Scanner> scanner_;
|
||||
};
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright 2023, Roman Gershman. All rights reserved.
|
||||
// Copyright 2023, DragonflyDB authors. All rights reserved.
|
||||
// See LICENSE for licensing terms.
|
||||
//
|
||||
|
||||
|
@ -22,8 +22,9 @@ class Scanner : public yyFlexLexer {
|
|||
|
||||
Parser::symbol_type ParserLex(QueryDriver& drv);
|
||||
|
||||
std::string matched() {
|
||||
return yytext;
|
||||
private:
|
||||
std::string Matched() const {
|
||||
return std::string(YYText(), YYLeng());
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
// Copyright 2023, Roman Gershman. All rights reserved.
|
||||
// Copyright 2023, DragonflyDB authors. All rights reserved.
|
||||
// See LICENSE for licensing terms.
|
||||
//
|
||||
|
||||
#include "base/gtest.h"
|
||||
#include "base/logging.h"
|
||||
#include "core/search/query_driver.h"
|
||||
|
||||
namespace dfly {
|
||||
|
@ -12,30 +14,52 @@ using namespace std;
|
|||
class SearchParserTest : public ::testing::Test {
|
||||
protected:
|
||||
SearchParserTest() {
|
||||
// query_driver_.scanner()->set_debug(1);
|
||||
}
|
||||
|
||||
void SetInput(const std::string& str) {
|
||||
istr_.str(str);
|
||||
query_driver_.scanner()->switch_streams(&istr_);
|
||||
query_driver_.SetInput(str);
|
||||
}
|
||||
|
||||
Parser::symbol_type Lex() {
|
||||
return query_driver_.scanner()->ParserLex(query_driver_);
|
||||
return query_driver_.Lex();
|
||||
}
|
||||
|
||||
QueryDriver query_driver_;
|
||||
|
||||
std::istringstream istr_;
|
||||
};
|
||||
|
||||
// tokens are not assignable, so we can not reuse them. This macros reduce the boilerplate.
|
||||
#define NEXT_EQ(tok_enum, type, val) \
|
||||
{ \
|
||||
auto tok = Lex(); \
|
||||
EXPECT_EQ(tok.type_get(), Parser::token::tok_enum); \
|
||||
EXPECT_EQ(val, tok.value.as<type>()); \
|
||||
}
|
||||
|
||||
#define NEXT_TOK(tok_enum) \
|
||||
{ \
|
||||
auto tok = Lex(); \
|
||||
ASSERT_EQ(tok.type_get(), Parser::token::tok_enum); \
|
||||
}
|
||||
|
||||
TEST_F(SearchParserTest, Scanner) {
|
||||
SetInput("ab cd");
|
||||
Parser::symbol_type tok = Lex();
|
||||
|
||||
// 3.5.1 does not have name() method.
|
||||
// EXPECT_STREQ("term", tok.name());
|
||||
EXPECT_EQ(tok.type_get(), Parser::token::TOK_TERM);
|
||||
EXPECT_EQ("ab", tok.value.as<string>());
|
||||
|
||||
NEXT_EQ(TOK_TERM, string, "ab");
|
||||
NEXT_EQ(TOK_TERM, string, "cd");
|
||||
NEXT_TOK(TOK_YYEOF);
|
||||
|
||||
SetInput("(5a 6) ");
|
||||
|
||||
NEXT_TOK(TOK_LPAREN);
|
||||
NEXT_EQ(TOK_TERM, string, "5a");
|
||||
NEXT_EQ(TOK_INT64, int64_t, 6);
|
||||
NEXT_TOK(TOK_RPAREN);
|
||||
|
||||
SetInput(R"( "hello\"world" )");
|
||||
NEXT_EQ(TOK_TERM, string, R"(hello"world)");
|
||||
}
|
||||
|
||||
} // namespace search
|
||||
|
|
Loading…
Reference in a new issue