From c34270c02ddfc5090f47e13025bc06b407cfc9f2 Mon Sep 17 00:00:00 2001 From: Boaz Sade Date: Thu, 8 Dec 2022 15:32:58 +0200 Subject: [PATCH] feat(server): json set type support (#546) Signed-off-by: Boaz Sade --- src/core/CMakeLists.txt | 4 +- src/core/compact_object.cc | 45 ++++++++++++++-- src/core/compact_object.h | 23 +++++--- src/core/compact_object_test.cc | 94 +++++++++++++++++++++++++++++++++ src/core/json_object.cc | 37 +++++++++++++ src/core/json_object.h | 36 +++++++++++++ src/server/common.cc | 4 +- 7 files changed, 231 insertions(+), 12 deletions(-) create mode 100644 src/core/json_object.cc create mode 100644 src/core/json_object.h diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 3daa484f8..55bb13c6a 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -1,8 +1,8 @@ add_library(dfly_core compact_object.cc dragonfly_core.cc extent_tree.cc - external_alloc.cc interpreter.cc mi_memory_resource.cc + external_alloc.cc interpreter.cc json_object.cc mi_memory_resource.cc segment_allocator.cc small_string.cc tx_queue.cc dense_set.cc string_set.cc) cxx_link(dfly_core base absl::flat_hash_map absl::str_format redis_lib TRDP::lua lua_modules - Boost::fiber crypto) + Boost::fiber TRDP::jsoncons crypto) add_executable(dash_bench dash_bench.cc) cxx_link(dash_bench dfly_core) diff --git a/src/core/compact_object.cc b/src/core/compact_object.cc index e97baeed3..afceeb500 100644 --- a/src/core/compact_object.cc +++ b/src/core/compact_object.cc @@ -16,9 +16,10 @@ extern "C" { #include "redis/zmalloc.h" // for non-string objects. #include "redis/zset.h" } - #include +#include + #include "base/flags.h" #include "base/logging.h" #include "base/pod_array.h" @@ -577,8 +578,9 @@ size_t CompactObj::Size() const { } uint64_t CompactObj::HashCode() const { - uint8_t encoded = (mask_ & kEncMask); + DCHECK(taglen_ != JSON_TAG) << "JSON type cannot be used for keys!"; + uint8_t encoded = (mask_ & kEncMask); if (IsInline()) { if (encoded) { char buf[kInlineLen * 2]; @@ -621,6 +623,10 @@ unsigned CompactObj::ObjType() const { if (taglen_ == ROBJ_TAG) return u_.r_obj.type(); + if (taglen_ == JSON_TAG) { + return OBJ_JSON; + } + LOG(FATAL) << "TBD " << int(taglen_); return 0; } @@ -726,6 +732,24 @@ std::optional CompactObj::TryGetInt() const { return val; } +auto CompactObj::GetJson() const -> JsonType* { + if (ObjType() == OBJ_JSON) { + return u_.json_obj.json_ptr; + } + return nullptr; +} + +void CompactObj::SetJson(JsonType&& j) { + if (taglen_ == JSON_TAG) { // already json + DCHECK(u_.json_obj.json_ptr != nullptr); // must be allocated + *u_.json_obj.json_ptr = std::move(j); + } else { + SetMeta(JSON_TAG); + void* ptr = tl.local_mr->allocate(sizeof(JsonType), kAlignSize); + u_.json_obj.json_ptr = new (ptr) JsonType(std::move(j)); + } +} + void CompactObj::SetString(std::string_view str) { uint8_t mask = mask_ & ~kEncMask; @@ -894,7 +918,7 @@ bool CompactObj::HasAllocated() const { (taglen_ == ROBJ_TAG && u_.r_obj.inner_obj() == nullptr)) return false; - DCHECK(taglen_ == ROBJ_TAG || taglen_ == SMALL_TAG); + DCHECK(taglen_ == ROBJ_TAG || taglen_ == SMALL_TAG || taglen_ == JSON_TAG); return true; } @@ -1004,6 +1028,10 @@ void CompactObj::Free() { } else if (taglen_ == SMALL_TAG) { tl.small_str_bytes -= u_.small_str.MallocUsed(); u_.small_str.Free(); + } else if (taglen_ == JSON_TAG) { + VLOG(1) << "Freeing JSON object"; + u_.json_obj.json_ptr->~JsonType(); + tl.local_mr->deallocate(u_.json_obj.json_ptr, kAlignSize); } else { LOG(FATAL) << "Unsupported tag " << int(taglen_); } @@ -1019,6 +1047,11 @@ size_t CompactObj::MallocUsed() const { return u_.r_obj.MallocUsed(); } + if (taglen_ == JSON_TAG) { + DCHECK(u_.json_obj.json_ptr != nullptr); + return zmalloc_size(u_.json_obj.json_ptr); + } + if (taglen_ == SMALL_TAG) { return u_.small_str.MallocUsed(); } @@ -1028,6 +1061,8 @@ size_t CompactObj::MallocUsed() const { } bool CompactObj::operator==(const CompactObj& o) const { + DCHECK(taglen_ != JSON_TAG && o.taglen_ != JSON_TAG) << "cannot use JSON type to check equal"; + uint8_t m1 = mask_ & kEncMask; uint8_t m2 = mask_ & kEncMask; if (m1 != m2) @@ -1096,6 +1131,10 @@ bool CompactObj::CmpEncoded(string_view sv) const { return detail::compare_packed(to_byte(u_.r_obj.inner_obj()), sv.data(), sv.size()); } + if (taglen_ == JSON_TAG) { + return false; // cannot compare json with string + } + if (taglen_ == SMALL_TAG) { if (u_.small_str.size() != encode_len) return false; diff --git a/src/core/compact_object.h b/src/core/compact_object.h index 087df9ee7..3853554fb 100644 --- a/src/core/compact_object.h +++ b/src/core/compact_object.h @@ -9,6 +9,7 @@ #include #include +#include "core/json_object.h" #include "core/small_string.h" typedef struct redisObject robj; @@ -94,12 +95,7 @@ class CompactObj { CompactObj(const CompactObj&) = delete; // 0-16 is reserved for inline lengths of string type. - enum TagEnum { - INT_TAG = 17, - SMALL_TAG = 18, - ROBJ_TAG = 19, - EXTERNAL_TAG = 20, - }; + enum TagEnum { INT_TAG = 17, SMALL_TAG = 18, ROBJ_TAG = 19, EXTERNAL_TAG = 20, JSON_TAG = 21 }; enum MaskBit { REF_BIT = 1, @@ -265,6 +261,15 @@ class CompactObj { void SetString(std::string_view str); void GetString(std::string* res) const; + // Will set this to hold OBJ_JSON, after that it is safe to call GetJson + // NOTE: in order to avid copy which can be expensive in this case, + // you need to move an object that created with the function JsonFromString + // into here, no copying is allowed! + void SetJson(JsonType&& j); + + // pre condition - the type here is OBJ_JSON and was set with SetJson + JsonType* GetJson() const; + // dest must have at least Size() bytes available void GetString(char* dest) const; @@ -326,6 +331,11 @@ class CompactObj { uint32_t unneeded; } __attribute__((packed)); + struct JsonWrapper { + JsonType* json_ptr = nullptr; + size_t unneeded = 0; + } __attribute__((packed)); + // My main data structure. Union of representations. // RobjWrapper is kInlineLen=16 bytes, so we employ SSO of that size via inline_str. // In case of int values, we waste 8 bytes. I am assuming it's ok and it's not the data type @@ -335,6 +345,7 @@ class CompactObj { SmallString small_str; detail::RobjWrapper r_obj; + JsonWrapper json_obj; int64_t ival __attribute__((packed)); ExternalPtr ext_ptr; diff --git a/src/core/compact_object_test.cc b/src/core/compact_object_test.cc index 34d4c73cc..117a2de0f 100644 --- a/src/core/compact_object_test.cc +++ b/src/core/compact_object_test.cc @@ -7,9 +7,13 @@ #include #include +#include +#include + #include "base/gtest.h" #include "base/logging.h" #include "core/flat_set.h" +#include "core/json_object.h" #include "core/mi_memory_resource.h" extern "C" { @@ -28,6 +32,8 @@ constexpr size_t kRandomStartIndex = 24; constexpr size_t kRandomStep = 26; constexpr float kUnderUtilizedRatio = 1.0f; // ensure that we would detect using namespace std; +using namespace jsoncons; +using namespace jsoncons::jsonpath; void PrintTo(const CompactObj& cobj, std::ostream* os) { if (cobj.ObjType() == OBJ_STRING) { @@ -359,4 +365,92 @@ TEST_F(CompactObjectTest, MimallocUnderutilzationWithRealloc) { } } +TEST_F(CompactObjectTest, JsonTypeTest) { + using namespace jsoncons; + // This test verify that we can set a json type + // and that we "know", it JSON and not a string + std::string_view json_str = R"( + {"firstName":"John","lastName":"Smith","age":27,"weight":135.25,"isAlive":true, + "address":{"street":"21 2nd Street","city":"New York","state":"NY","zipcode":"10021-3100"}, + "phoneNumbers":[{"type":"home","number":"212 555-1234"},{"type":"office","number":"646 555-4567"}], + "children":[],"spouse":null} + )"; + std::optional json_option2 = + JsonFromString(R"({"a":{}, "b":{"a":1}, "c":{"a":1, "b":2}})"); + + cobj_.SetString(json_str); + ASSERT_TRUE(cobj_.ObjType() == OBJ_STRING); // we set this as a string + JsonType* failed_json = cobj_.GetJson(); + ASSERT_TRUE(failed_json == nullptr); + ASSERT_TRUE(cobj_.ObjType() == OBJ_STRING); + std::optional json_option = JsonFromString(json_str); + ASSERT_TRUE(json_option.has_value()); + cobj_.SetJson(std::move(json_option.value())); + ASSERT_TRUE(cobj_.ObjType() == OBJ_JSON); // and now this is a JSON type + JsonType* json = cobj_.GetJson(); + ASSERT_TRUE(json != nullptr); + ASSERT_TRUE(json->contains("firstName")); + // set second object make sure that we don't have any memory issue + ASSERT_TRUE(json_option2.has_value()); + cobj_.SetJson(std::move(json_option2.value())); + ASSERT_TRUE(cobj_.ObjType() == OBJ_JSON); // still is a JSON type + json = cobj_.GetJson(); + ASSERT_TRUE(json != nullptr); + ASSERT_TRUE(json->contains("b")); + ASSERT_FALSE(json->contains("firstName")); + std::optional set_array = JsonFromString(""); + // now set it to string again + cobj_.SetString(R"({"a":{}, "b":{"a":1}, "c":{"a":1, "b":2}})"); + ASSERT_TRUE(cobj_.ObjType() == OBJ_STRING); // we set this as a string + failed_json = cobj_.GetJson(); + ASSERT_TRUE(failed_json == nullptr); +} + +TEST_F(CompactObjectTest, JsonTypeWithPathTest) { + std::string_view books_json = + R"({"books":[{ + "category": "fiction", + "title" : "A Wild Sheep Chase", + "author" : "Haruki Murakami" + },{ + "category": "fiction", + "title" : "The Night Watch", + "author" : "Sergei Lukyanenko" + },{ + "category": "fiction", + "title" : "The Comedians", + "author" : "Graham Greene" + },{ + "category": "memoir", + "title" : "The Night Watch", + "author" : "Phillips, David Atlee" + }]})"; + std::optional json_array = JsonFromString(books_json); + ASSERT_TRUE(json_array.has_value()); + cobj_.SetJson(std::move(json_array.value())); + ASSERT_TRUE(cobj_.ObjType() == OBJ_JSON); // and now this is a JSON type + auto f = [](const std::string& /*path*/, JsonType& book) { + if (book.at("category") == "memoir" && !book.contains("price")) { + book.try_emplace("price", 140.0); + } + }; + JsonType* json = cobj_.GetJson(); + ASSERT_TRUE(json != nullptr); + jsonpath::json_replace(*json, "$.books[*]", f); + + // Check whether we've changed the entry for json in place + // we should have prices only for memoir books + JsonType* json2 = cobj_.GetJson(); + ASSERT_TRUE(json != nullptr); + ASSERT_TRUE(json->contains("books")); + for (auto&& book : (*json2)["books"].array_range()) { + // make sure that we add prices only to "memoir" + if (book.at("category") == "memoir") { + ASSERT_TRUE(book.contains("price")); + } else { + ASSERT_FALSE(book.contains("price")); + } + } +} + } // namespace dfly diff --git a/src/core/json_object.cc b/src/core/json_object.cc new file mode 100644 index 000000000..e7c158d3d --- /dev/null +++ b/src/core/json_object.cc @@ -0,0 +1,37 @@ +// Copyright 2022, DragonflyDB authors. All rights reserved. +// See LICENSE for licensing terms. +// + +#include "core/json_object.h" +extern "C" { +#include "redis/object.h" +#include "redis/zmalloc.h" +} +#include + +#include "base/logging.h" + +namespace dfly { + +std::optional JsonFromString(std::string_view input) { + using namespace jsoncons; + + std::error_code ec; + auto JsonErrorHandler = [](json_errc ec, const ser_context&) { + VLOG(1) << "Error while decode JSON: " << make_error_code(ec).message(); + return false; + }; + + json_decoder decoder; + json_parser parser(basic_json_decode_options{}, JsonErrorHandler); + + parser.update(input); + parser.finish_parse(decoder, ec); + + if (decoder.is_valid()) { + return decoder.get_result(); + } + return {}; +} + +} // namespace dfly diff --git a/src/core/json_object.h b/src/core/json_object.h new file mode 100644 index 000000000..6e1a19593 --- /dev/null +++ b/src/core/json_object.h @@ -0,0 +1,36 @@ +// Copyright 2022, DragonflyDB authors. All rights reserved. +// See LICENSE for licensing terms. +// + +#pragma once + +#include +#include +#include +#include + +// Note about this file - once we have the issue with jsonpath in jsoncons resolved +// we would add the implementation for the allocator here as well. Right now this +// file is a little bit empty, but for external "users" such as json_family they +// should include this when creating JSON object from string that we're getting +// from the commands. +namespace jsoncons { +struct sorted_policy; +template class basic_json; +} // namespace jsoncons + +// the last one in object.h is OBJ_STREAM and it is 6, +// this will add enough place for Redis types to grow +constexpr unsigned OBJ_JSON = 15; + +namespace dfly { + +// This is temporary, there is an issue right now with jsoncons about using jsonpath +// with custom allocator. once it would resolved, we would change this to use custom allocator +// that allocate memory from mimalloc +using JsonType = jsoncons::basic_json>; + +// Build a json object from string. If the string is not legal json, will return nullopt +std::optional JsonFromString(std::string_view input); + +} // namespace dfly diff --git a/src/server/common.cc b/src/server/common.cc index 62046a5c4..4fcbdf15d 100644 --- a/src/server/common.cc +++ b/src/server/common.cc @@ -15,8 +15,8 @@ extern "C" { #include "redis/util.h" #include "redis/zmalloc.h" } - #include "base/logging.h" +#include "core/compact_object.h" #include "server/error.h" #include "server/server_state.h" @@ -87,6 +87,8 @@ const char* ObjTypeName(int type) { return "hash"; case OBJ_STREAM: return "stream"; + case OBJ_JSON: + return "ReJSON-RL"; default: LOG(ERROR) << "Unsupported type " << type; }