mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2024-12-14 11:58:02 +00:00
feat: introduce a skeleton for jsonpath parser (#2521)
Not finished yet, mostly a boilerplate plus minimal bindings for the test. Motivation: as long as we rely on jsoncons jsonpath implementation we won't be able to change json implementation as well. jsoncons path is quite qood but it will only work with jsoncons json library. By implementing this ourselves we will gain the freedom to change json implementation in the future. Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
parent
ed59a439d1
commit
4b4ff4688b
9 changed files with 290 additions and 0 deletions
|
@ -1,4 +1,6 @@
|
|||
add_subdirectory(search)
|
||||
add_subdirectory(json)
|
||||
|
||||
set(SEARCH_LIB query_parser)
|
||||
|
||||
add_library(dfly_core compact_object.cc dragonfly_core.cc extent_tree.cc
|
||||
|
|
10
src/core/json/CMakeLists.txt
Normal file
10
src/core/json/CMakeLists.txt
Normal file
|
@ -0,0 +1,10 @@
|
|||
gen_flex(jsonpath_lexer)
|
||||
gen_bison(jsonpath_grammar)
|
||||
|
||||
cur_gen_dir(gen_dir)
|
||||
|
||||
add_library(jsonpath lexer_impl.cc driver.cc
|
||||
${gen_dir}/jsonpath_lexer.cc ${gen_dir}/jsonpath_grammar.cc)
|
||||
target_link_libraries(jsonpath base absl::strings TRDP::reflex)
|
||||
|
||||
cxx_test(jsonpath_test jsonpath LABELS DFLY)
|
22
src/core/json/driver.cc
Normal file
22
src/core/json/driver.cc
Normal file
|
@ -0,0 +1,22 @@
|
|||
// Copyright 2024, DragonflyDB authors. All rights reserved.
|
||||
// See LICENSE for licensing terms.
|
||||
//
|
||||
|
||||
#include "src/core/json/driver.h"
|
||||
|
||||
#include "src/core/json/lexer_impl.h"
|
||||
|
||||
namespace dfly::json {
|
||||
|
||||
Driver::Driver() : lexer_(std::make_unique<Lexer>()) {
|
||||
}
|
||||
|
||||
Driver::~Driver() {
|
||||
}
|
||||
|
||||
void Driver::SetInput(std::string str) {
|
||||
cur_str_ = std::move(str);
|
||||
lexer_->in(cur_str_);
|
||||
}
|
||||
|
||||
} // namespace dfly::json
|
32
src/core/json/driver.h
Normal file
32
src/core/json/driver.h
Normal file
|
@ -0,0 +1,32 @@
|
|||
// Copyright 2024, DragonflyDB authors. All rights reserved.
|
||||
// See LICENSE for licensing terms.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
namespace dfly {
|
||||
namespace json {
|
||||
|
||||
class Lexer;
|
||||
|
||||
class Driver {
|
||||
public:
|
||||
Driver();
|
||||
~Driver();
|
||||
|
||||
Lexer* lexer() {
|
||||
return lexer_.get();
|
||||
}
|
||||
|
||||
void SetInput(std::string str);
|
||||
|
||||
private:
|
||||
std::string cur_str_;
|
||||
std::unique_ptr<Lexer> lexer_;
|
||||
};
|
||||
|
||||
} // namespace json
|
||||
} // namespace dfly
|
78
src/core/json/jsonpath_grammar.y
Normal file
78
src/core/json/jsonpath_grammar.y
Normal file
|
@ -0,0 +1,78 @@
|
|||
%skeleton "lalr1.cc" // -*- C++ -*-
|
||||
%require "3.5" // fedora 32 has this one.
|
||||
|
||||
%defines // %header starts from 3.8.1
|
||||
|
||||
%define api.namespace {dfly::json}
|
||||
%define api.token.raw
|
||||
%define api.token.constructor
|
||||
%define api.value.type variant
|
||||
%define api.parser.class {Parser}
|
||||
%define parse.assert
|
||||
|
||||
// Added to header file before parser declaration.
|
||||
%code requires {
|
||||
namespace dfly {
|
||||
namespace json {
|
||||
class Driver;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Added to cc file
|
||||
%code {
|
||||
|
||||
#include "src/core/json/lexer_impl.h"
|
||||
#include "src/core/json/driver.h"
|
||||
|
||||
// Have to disable because GCC doesn't understand `symbol_type`'s union
|
||||
// implementation
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
|
||||
#define yylex driver->lexer()->Lex
|
||||
|
||||
using namespace std;
|
||||
}
|
||||
|
||||
%parse-param { Driver *driver }
|
||||
|
||||
%locations
|
||||
|
||||
%define parse.trace
|
||||
%define parse.error verbose // detailed
|
||||
%define parse.lac full
|
||||
%define api.token.prefix {TOK_}
|
||||
|
||||
%token
|
||||
LBRACKET "["
|
||||
ROOT "$"
|
||||
DOT "."
|
||||
|
||||
// Needed 0 at the end to satisfy bison 3.5.1
|
||||
%token YYEOF 0
|
||||
%token <std::string> UNQ_STR "unq_str"
|
||||
|
||||
%%
|
||||
// Based on the following specification:
|
||||
// https://danielaparker.github.io/JsonCons.Net/articles/JsonPath/Specification.html
|
||||
|
||||
jsonpath: ROOT
|
||||
| ROOT relative_location
|
||||
|
||||
relative_location: DOT relative_path
|
||||
|
||||
relative_path: identifier
|
||||
| identifier relative_location
|
||||
|
||||
identifier: unquoted_string
|
||||
// | single_quoted_string | double_quoted_string
|
||||
|
||||
unquoted_string : UNQ_STR
|
||||
|
||||
%%
|
||||
|
||||
|
||||
void dfly::json::Parser::error(const location_type& l, const string& m)
|
||||
{
|
||||
cerr << l << ": " << m << '\n';
|
||||
}
|
48
src/core/json/jsonpath_lexer.lex
Normal file
48
src/core/json/jsonpath_lexer.lex
Normal file
|
@ -0,0 +1,48 @@
|
|||
%top{
|
||||
// generated in the header file.
|
||||
#include "core/json/jsonpath_grammar.hh"
|
||||
}
|
||||
|
||||
|
||||
%{
|
||||
// SECOND SECTION
|
||||
%}
|
||||
|
||||
%o bison-cc-namespace="dfly.json" bison-cc-parser="Parser"
|
||||
%o namespace="dfly.json"
|
||||
|
||||
// Generated class and main function
|
||||
%o lexer="AbstractLexer" lex="Lex"
|
||||
|
||||
// our derived class from AbstractLexer
|
||||
%o class="Lexer"
|
||||
%o nodefault batch
|
||||
%option unicode
|
||||
|
||||
/* Declarations before lexer implementation. */
|
||||
%{
|
||||
#define DFLY_LEXER_CC 1
|
||||
#include "src/core/json/lexer_impl.h"
|
||||
#undef DFLY_LEXER_CC
|
||||
%}
|
||||
|
||||
|
||||
%{
|
||||
// Code run each time a pattern is matched.
|
||||
%}
|
||||
|
||||
%%
|
||||
|
||||
%{
|
||||
// Code run each time lex() is called.
|
||||
%}
|
||||
|
||||
[[:space:]]+ ; // skip white space
|
||||
|
||||
"$" return Parser::make_ROOT(loc());
|
||||
"." return Parser::make_DOT(loc());
|
||||
\w[\w_\-]* return Parser::make_UNQ_STR(str(), loc());
|
||||
<<EOF>> printf("EOF%s\n", matcher().text());
|
||||
%%
|
||||
|
||||
// Function definitions
|
51
src/core/json/jsonpath_test.cc
Normal file
51
src/core/json/jsonpath_test.cc
Normal file
|
@ -0,0 +1,51 @@
|
|||
// Copyright 2024, DragonflyDB authors. All rights reserved.
|
||||
// See LICENSE for licensing terms.
|
||||
//
|
||||
|
||||
#include "base/gtest.h"
|
||||
#include "base/logging.h"
|
||||
#include "core/json/driver.h"
|
||||
#include "core/json/lexer_impl.h"
|
||||
|
||||
namespace dfly::json {
|
||||
|
||||
using namespace std;
|
||||
|
||||
class JsonPathTest : public ::testing::Test {
|
||||
protected:
|
||||
JsonPathTest() {
|
||||
driver_.lexer()->set_debug(1);
|
||||
}
|
||||
|
||||
void SetInput(const std::string& str) {
|
||||
driver_.SetInput(str);
|
||||
}
|
||||
|
||||
Parser::symbol_type Lex() {
|
||||
return driver_.lexer()->Lex();
|
||||
}
|
||||
|
||||
Driver driver_;
|
||||
};
|
||||
|
||||
#define NEXT_TOK(tok_enum) \
|
||||
{ \
|
||||
auto tok = Lex(); \
|
||||
ASSERT_EQ(tok.type_get(), Parser::token::TOK_##tok_enum); \
|
||||
}
|
||||
|
||||
#define NEXT_EQ(tok_enum, type, val) \
|
||||
{ \
|
||||
auto tok = Lex(); \
|
||||
ASSERT_EQ(tok.type_get(), Parser::token::TOK_##tok_enum); \
|
||||
EXPECT_EQ(val, tok.value.as<type>()); \
|
||||
}
|
||||
|
||||
TEST_F(JsonPathTest, Scanner) {
|
||||
SetInput("$.мага-зин2.book[0].title");
|
||||
NEXT_TOK(ROOT);
|
||||
NEXT_TOK(DOT);
|
||||
NEXT_EQ(UNQ_STR, string, "мага-зин2");
|
||||
}
|
||||
|
||||
} // namespace dfly::json
|
15
src/core/json/lexer_impl.cc
Normal file
15
src/core/json/lexer_impl.cc
Normal file
|
@ -0,0 +1,15 @@
|
|||
// Copyright 2024, DragonflyDB authors. All rights reserved.
|
||||
// See LICENSE for licensing terms.
|
||||
//
|
||||
|
||||
#include "src/core/json/lexer_impl.h"
|
||||
|
||||
namespace dfly::json {
|
||||
|
||||
Lexer::Lexer() {
|
||||
}
|
||||
|
||||
Lexer::~Lexer() {
|
||||
}
|
||||
|
||||
} // namespace dfly::json
|
32
src/core/json/lexer_impl.h
Normal file
32
src/core/json/lexer_impl.h
Normal file
|
@ -0,0 +1,32 @@
|
|||
// Copyright 2024, DragonflyDB authors. All rights reserved.
|
||||
// See LICENSE for licensing terms.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
// We should not include lexer.h when compiling from lexer.cc file because it already
|
||||
// includes lexer.h
|
||||
#ifndef DFLY_LEXER_CC
|
||||
#include "src/core/json/jsonpath_lexer.h"
|
||||
#endif
|
||||
|
||||
#include "src/core/json/jsonpath_grammar.hh"
|
||||
|
||||
namespace dfly {
|
||||
namespace json {
|
||||
|
||||
class Lexer : public AbstractLexer {
|
||||
public:
|
||||
Lexer();
|
||||
~Lexer();
|
||||
|
||||
Parser::symbol_type Lex() final;
|
||||
|
||||
private:
|
||||
dfly::json::location loc() {
|
||||
return location();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace json
|
||||
} // namespace dfly
|
Loading…
Reference in a new issue