1
0
Fork 0
mirror of https://github.com/dragonflydb/dragonfly.git synced 2024-12-14 11:58:02 +00:00

feat: introduce a skeleton for jsonpath parser (#2521)

Not finished yet, mostly a boilerplate plus minimal bindings for the test.
Motivation: as long as we rely on jsoncons jsonpath implementation we won't be able
to change json implementation as well. jsoncons path is quite qood but it will only
work with jsoncons json library. By implementing this ourselves we will gain the freedom
to change json implementation in the future.

Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
Roman Gershman 2024-02-06 19:21:40 +02:00 committed by GitHub
parent ed59a439d1
commit 4b4ff4688b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 290 additions and 0 deletions

View file

@ -1,4 +1,6 @@
add_subdirectory(search)
add_subdirectory(json)
set(SEARCH_LIB query_parser)
add_library(dfly_core compact_object.cc dragonfly_core.cc extent_tree.cc

View file

@ -0,0 +1,10 @@
gen_flex(jsonpath_lexer)
gen_bison(jsonpath_grammar)
cur_gen_dir(gen_dir)
add_library(jsonpath lexer_impl.cc driver.cc
${gen_dir}/jsonpath_lexer.cc ${gen_dir}/jsonpath_grammar.cc)
target_link_libraries(jsonpath base absl::strings TRDP::reflex)
cxx_test(jsonpath_test jsonpath LABELS DFLY)

22
src/core/json/driver.cc Normal file
View file

@ -0,0 +1,22 @@
// Copyright 2024, DragonflyDB authors. All rights reserved.
// See LICENSE for licensing terms.
//
#include "src/core/json/driver.h"
#include "src/core/json/lexer_impl.h"
namespace dfly::json {
Driver::Driver() : lexer_(std::make_unique<Lexer>()) {
}
Driver::~Driver() {
}
void Driver::SetInput(std::string str) {
cur_str_ = std::move(str);
lexer_->in(cur_str_);
}
} // namespace dfly::json

32
src/core/json/driver.h Normal file
View file

@ -0,0 +1,32 @@
// Copyright 2024, DragonflyDB authors. All rights reserved.
// See LICENSE for licensing terms.
//
#pragma once
#include <memory>
#include <string>
namespace dfly {
namespace json {
class Lexer;
class Driver {
public:
Driver();
~Driver();
Lexer* lexer() {
return lexer_.get();
}
void SetInput(std::string str);
private:
std::string cur_str_;
std::unique_ptr<Lexer> lexer_;
};
} // namespace json
} // namespace dfly

View file

@ -0,0 +1,78 @@
%skeleton "lalr1.cc" // -*- C++ -*-
%require "3.5" // fedora 32 has this one.
%defines // %header starts from 3.8.1
%define api.namespace {dfly::json}
%define api.token.raw
%define api.token.constructor
%define api.value.type variant
%define api.parser.class {Parser}
%define parse.assert
// Added to header file before parser declaration.
%code requires {
namespace dfly {
namespace json {
class Driver;
}
}
}
// Added to cc file
%code {
#include "src/core/json/lexer_impl.h"
#include "src/core/json/driver.h"
// Have to disable because GCC doesn't understand `symbol_type`'s union
// implementation
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
#define yylex driver->lexer()->Lex
using namespace std;
}
%parse-param { Driver *driver }
%locations
%define parse.trace
%define parse.error verbose // detailed
%define parse.lac full
%define api.token.prefix {TOK_}
%token
LBRACKET "["
ROOT "$"
DOT "."
// Needed 0 at the end to satisfy bison 3.5.1
%token YYEOF 0
%token <std::string> UNQ_STR "unq_str"
%%
// Based on the following specification:
// https://danielaparker.github.io/JsonCons.Net/articles/JsonPath/Specification.html
jsonpath: ROOT
| ROOT relative_location
relative_location: DOT relative_path
relative_path: identifier
| identifier relative_location
identifier: unquoted_string
// | single_quoted_string | double_quoted_string
unquoted_string : UNQ_STR
%%
void dfly::json::Parser::error(const location_type& l, const string& m)
{
cerr << l << ": " << m << '\n';
}

View file

@ -0,0 +1,48 @@
%top{
// generated in the header file.
#include "core/json/jsonpath_grammar.hh"
}
%{
// SECOND SECTION
%}
%o bison-cc-namespace="dfly.json" bison-cc-parser="Parser"
%o namespace="dfly.json"
// Generated class and main function
%o lexer="AbstractLexer" lex="Lex"
// our derived class from AbstractLexer
%o class="Lexer"
%o nodefault batch
%option unicode
/* Declarations before lexer implementation. */
%{
#define DFLY_LEXER_CC 1
#include "src/core/json/lexer_impl.h"
#undef DFLY_LEXER_CC
%}
%{
// Code run each time a pattern is matched.
%}
%%
%{
// Code run each time lex() is called.
%}
[[:space:]]+ ; // skip white space
"$" return Parser::make_ROOT(loc());
"." return Parser::make_DOT(loc());
\w[\w_\-]* return Parser::make_UNQ_STR(str(), loc());
<<EOF>> printf("EOF%s\n", matcher().text());
%%
// Function definitions

View file

@ -0,0 +1,51 @@
// Copyright 2024, DragonflyDB authors. All rights reserved.
// See LICENSE for licensing terms.
//
#include "base/gtest.h"
#include "base/logging.h"
#include "core/json/driver.h"
#include "core/json/lexer_impl.h"
namespace dfly::json {
using namespace std;
class JsonPathTest : public ::testing::Test {
protected:
JsonPathTest() {
driver_.lexer()->set_debug(1);
}
void SetInput(const std::string& str) {
driver_.SetInput(str);
}
Parser::symbol_type Lex() {
return driver_.lexer()->Lex();
}
Driver driver_;
};
#define NEXT_TOK(tok_enum) \
{ \
auto tok = Lex(); \
ASSERT_EQ(tok.type_get(), Parser::token::TOK_##tok_enum); \
}
#define NEXT_EQ(tok_enum, type, val) \
{ \
auto tok = Lex(); \
ASSERT_EQ(tok.type_get(), Parser::token::TOK_##tok_enum); \
EXPECT_EQ(val, tok.value.as<type>()); \
}
TEST_F(JsonPathTest, Scanner) {
SetInput("$.мага-зин2.book[0].title");
NEXT_TOK(ROOT);
NEXT_TOK(DOT);
NEXT_EQ(UNQ_STR, string, "мага-зин2");
}
} // namespace dfly::json

View file

@ -0,0 +1,15 @@
// Copyright 2024, DragonflyDB authors. All rights reserved.
// See LICENSE for licensing terms.
//
#include "src/core/json/lexer_impl.h"
namespace dfly::json {
Lexer::Lexer() {
}
Lexer::~Lexer() {
}
} // namespace dfly::json

View file

@ -0,0 +1,32 @@
// Copyright 2024, DragonflyDB authors. All rights reserved.
// See LICENSE for licensing terms.
//
#pragma once
// We should not include lexer.h when compiling from lexer.cc file because it already
// includes lexer.h
#ifndef DFLY_LEXER_CC
#include "src/core/json/jsonpath_lexer.h"
#endif
#include "src/core/json/jsonpath_grammar.hh"
namespace dfly {
namespace json {
class Lexer : public AbstractLexer {
public:
Lexer();
~Lexer();
Parser::symbol_type Lex() final;
private:
dfly::json::location loc() {
return location();
}
};
} // namespace json
} // namespace dfly