1
0
Fork 0
mirror of https://github.com/dragonflydb/dragonfly.git synced 2024-12-14 11:58:02 +00:00

chore: support more token types in the lexer (#1134)

1. Support integers
2. Support string literals
3. Add more test coverage.

Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
Roman Gershman 2023-04-25 14:57:24 +02:00 committed by GitHub
parent 8749d736dd
commit ce5db032fc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 91 additions and 28 deletions

View file

@ -6,5 +6,5 @@ gen_bison(parser)
cur_gen_dir(gen_dir)
add_library(query_parser query_driver.cc ${gen_dir}/parser.cc ${gen_dir}/lexer.cc)
target_link_libraries(query_parser glog)
target_link_libraries(query_parser base absl::strings)
cxx_test(search_parser_test query_parser)

View file

@ -1,5 +1,11 @@
/* Seems that flex does not have unicode support.
TODO: to consider https://en.wikipedia.org/wiki/RE/flex in the future.
*/
%{
#include <climits>
#include <absl/strings/escaping.h>
#include <absl/strings/numbers.h>
#include "base/logging.h"
#include "core/search/query_driver.h"
// Define main lexer function. QueryDriver is the shared state between scanner and parser
@ -15,12 +21,17 @@
%{
// A number symbol corresponding to the value in S.
using dfly::search::Parser;
using namespace std;
Parser::symbol_type make_NUMBER (const std::string &s, const Parser::location_type& loc);
Parser::symbol_type make_INT64 (string_view, const Parser::location_type& loc);
Parser::symbol_type make_StringLit(string_view src, const Parser::location_type& loc);
%}
int [0-9]+
blank [ \t\r]
dq \"
esc_chars ['"\?\\abfnrtv]
esc_seq \\{esc_chars}
str_char ([^"]|{esc_seq})
%{
// Code run each time a pattern is matched.
@ -43,18 +54,34 @@ blank [ \t\r]
"(" return Parser::make_LPAREN (loc);
")" return Parser::make_RPAREN (loc);
{int} return make_NUMBER (yytext, loc);
[^ \t\r]+ return Parser::make_TERM (yytext, loc);
-?[0-9]+ return make_INT64(Matched(), loc);
{dq}{str_char}*{dq} return make_StringLit(string_view{YYText(), size_t(YYLeng())}, loc);
[[:alnum:]]+ return Parser::make_TERM(Matched(), loc);
<<EOF>> return Parser::make_YYEOF(loc);
%%
Parser::symbol_type
make_NUMBER (const std::string &s, const Parser::location_type& loc)
make_INT64 (string_view str, const Parser::location_type& loc)
{
errno = 0;
long n = strtol (s.c_str(), NULL, 10);
if (! (INT_MIN <= n && n <= INT_MAX && errno != ERANGE))
throw Parser::syntax_error (loc, "integer is out of range: " + s);
return Parser::make_NUMBER ((int) n, loc);
int64_t val = 0;
if (!absl::SimpleAtoi(str, &val))
throw Parser::syntax_error (loc, "not an integer or out of range: " + string(str));
return Parser::make_INT64(val, loc);
}
Parser::symbol_type make_StringLit(string_view src, const Parser::location_type& loc) {
DCHECK_GE(src.size(), 2u);
// Remove quotes
src.remove_prefix(1);
src.remove_suffix(1);
string res;
if (!absl::CUnescape(src, &res)) {
throw Parser::syntax_error (loc, "bad escaped string: " + string(src));
}
return Parser::make_TERM(res, loc);
}

View file

@ -44,7 +44,7 @@
%token YYEOF
%token <std::string> TERM "term"
%token <int> NUMBER "number"
%token <int64_t> INT64 "int64"
%nterm <int> bool_expr
%printer { yyo << $$; } <*>;

View file

@ -1,4 +1,4 @@
// Copyright 2023, Roman Gershman. All rights reserved.
// Copyright 2023, DragonflyDB authors. All rights reserved.
// See LICENSE for licensing terms.
//

View file

@ -1,10 +1,11 @@
// Copyright 2023, Roman Gershman. All rights reserved.
// Copyright 2023, DragonflyDB authors. All rights reserved.
// See LICENSE for licensing terms.
//
#pragma once
#include <memory>
#include <sstream>
#include "core/search/parser.hh"
#include "core/search/scanner.h"
@ -22,9 +23,19 @@ class QueryDriver {
return scanner_.get();
}
void SetInput(const std::string& str) {
istr_.str(str);
scanner()->switch_streams(&istr_);
}
Parser::symbol_type Lex() {
return scanner()->ParserLex(*this);
}
Parser::location_type location;
private:
std::istringstream istr_;
std::unique_ptr<Scanner> scanner_;
};

View file

@ -1,4 +1,4 @@
// Copyright 2023, Roman Gershman. All rights reserved.
// Copyright 2023, DragonflyDB authors. All rights reserved.
// See LICENSE for licensing terms.
//
@ -22,8 +22,9 @@ class Scanner : public yyFlexLexer {
Parser::symbol_type ParserLex(QueryDriver& drv);
std::string matched() {
return yytext;
private:
std::string Matched() const {
return std::string(YYText(), YYLeng());
}
};

View file

@ -1,7 +1,9 @@
// Copyright 2023, Roman Gershman. All rights reserved.
// Copyright 2023, DragonflyDB authors. All rights reserved.
// See LICENSE for licensing terms.
//
#include "base/gtest.h"
#include "base/logging.h"
#include "core/search/query_driver.h"
namespace dfly {
@ -12,30 +14,52 @@ using namespace std;
class SearchParserTest : public ::testing::Test {
protected:
SearchParserTest() {
// query_driver_.scanner()->set_debug(1);
}
void SetInput(const std::string& str) {
istr_.str(str);
query_driver_.scanner()->switch_streams(&istr_);
query_driver_.SetInput(str);
}
Parser::symbol_type Lex() {
return query_driver_.scanner()->ParserLex(query_driver_);
return query_driver_.Lex();
}
QueryDriver query_driver_;
std::istringstream istr_;
};
// tokens are not assignable, so we can not reuse them. This macros reduce the boilerplate.
#define NEXT_EQ(tok_enum, type, val) \
{ \
auto tok = Lex(); \
EXPECT_EQ(tok.type_get(), Parser::token::tok_enum); \
EXPECT_EQ(val, tok.value.as<type>()); \
}
#define NEXT_TOK(tok_enum) \
{ \
auto tok = Lex(); \
ASSERT_EQ(tok.type_get(), Parser::token::tok_enum); \
}
TEST_F(SearchParserTest, Scanner) {
SetInput("ab cd");
Parser::symbol_type tok = Lex();
// 3.5.1 does not have name() method.
// EXPECT_STREQ("term", tok.name());
EXPECT_EQ(tok.type_get(), Parser::token::TOK_TERM);
EXPECT_EQ("ab", tok.value.as<string>());
NEXT_EQ(TOK_TERM, string, "ab");
NEXT_EQ(TOK_TERM, string, "cd");
NEXT_TOK(TOK_YYEOF);
SetInput("(5a 6) ");
NEXT_TOK(TOK_LPAREN);
NEXT_EQ(TOK_TERM, string, "5a");
NEXT_EQ(TOK_INT64, int64_t, 6);
NEXT_TOK(TOK_RPAREN);
SetInput(R"( "hello\"world" )");
NEXT_EQ(TOK_TERM, string, R"(hello"world)");
}
} // namespace search