mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2024-12-14 11:58:02 +00:00
feat: introduce simd algorithm for bitpacking (#568)
My benchmark shows a x3.5 improvement when compressing a 1KB string. Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
parent
adc89c7592
commit
bcafd7e25d
8 changed files with 331 additions and 142 deletions
2
.github/workflows/ci.yml
vendored
2
.github/workflows/ci.yml
vendored
|
@ -77,6 +77,6 @@ jobs:
|
|||
ccache --show-stats
|
||||
echo Run ctest -V -L DFLY
|
||||
#GLOG_logtostderr=1 GLOG_vmodule=transaction=1,engine_shard_set=1
|
||||
GLOG_logtostderr=1 GLOG_vmodule=rdb_load=1,rdb_save=2,snapshot=2 ctest -V -L DFLY
|
||||
GLOG_logtostderr=1 GLOG_vmodule=rdb_load=1,rdb_save=1,snapshot=1 ctest -V -L DFLY
|
||||
./dragonfly_test --mem_defrag_threshold=0.05 # trying to catch issue with defrag
|
||||
# GLOG_logtostderr=1 GLOG_vmodule=transaction=1,engine_shard_set=1 CTEST_OUTPUT_ON_FAILURE=1 ninja server/test
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
add_library(dfly_core compact_object.cc dragonfly_core.cc extent_tree.cc
|
||||
external_alloc.cc interpreter.cc json_object.cc mi_memory_resource.cc
|
||||
segment_allocator.cc small_string.cc tx_queue.cc dense_set.cc string_set.cc)
|
||||
segment_allocator.cc small_string.cc tx_queue.cc dense_set.cc string_set.cc
|
||||
detail/bitpacking.cc)
|
||||
|
||||
cxx_link(dfly_core base absl::flat_hash_map absl::str_format redis_lib TRDP::lua lua_modules
|
||||
Boost::fiber TRDP::jsoncons crypto)
|
||||
|
||||
|
|
|
@ -23,19 +23,15 @@ extern "C" {
|
|||
#include "base/flags.h"
|
||||
#include "base/logging.h"
|
||||
#include "base/pod_array.h"
|
||||
#include "core/detail/bitpacking.h"
|
||||
#include "core/string_set.h"
|
||||
|
||||
#if defined(__aarch64__)
|
||||
#include "base/sse2neon.h"
|
||||
#else
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
ABSL_FLAG(bool, use_set2, true, "If true use DenseSet for an optimized set data structure");
|
||||
|
||||
namespace dfly {
|
||||
using namespace std;
|
||||
using absl::GetFlag;
|
||||
using detail::binpacked_len;
|
||||
|
||||
namespace {
|
||||
|
||||
|
@ -154,35 +150,6 @@ inline void FreeObjStream(void* ptr) {
|
|||
freeStream((stream*)ptr);
|
||||
}
|
||||
|
||||
// Daniel Lemire's function validate_ascii_fast() - under Apache/MIT license.
|
||||
// See https://github.com/lemire/fastvalidate-utf-8/
|
||||
// The function returns true (1) if all chars passed in src are
|
||||
// 7-bit values (0x00..0x7F). Otherwise, it returns false (0).
|
||||
bool validate_ascii_fast(const char* src, size_t len) {
|
||||
size_t i = 0;
|
||||
__m128i has_error = _mm_setzero_si128();
|
||||
if (len >= 16) {
|
||||
for (; i <= len - 16; i += 16) {
|
||||
__m128i current_bytes = _mm_loadu_si128((const __m128i*)(src + i));
|
||||
has_error = _mm_or_si128(has_error, current_bytes);
|
||||
}
|
||||
}
|
||||
int error_mask = _mm_movemask_epi8(has_error);
|
||||
|
||||
char tail_has_error = 0;
|
||||
for (; i < len; i++) {
|
||||
tail_has_error |= src[i];
|
||||
}
|
||||
error_mask |= (tail_has_error & 0x80);
|
||||
|
||||
return !error_mask;
|
||||
}
|
||||
|
||||
// maps ascii len to 7-bit packed length. Each 8 bytes are converted to 7 bytes.
|
||||
inline constexpr size_t binpacked_len(size_t ascii_len) {
|
||||
return (ascii_len * 7 + 7) / 8; /* rounded up */
|
||||
}
|
||||
|
||||
// converts 7-bit packed length back to ascii length. Note that this conversion
|
||||
// is not accurate since it maps 7 bytes to 8 bytes (rounds up), while we may have
|
||||
// 7 byte strings converted to 7 byte as well.
|
||||
|
@ -428,91 +395,6 @@ void RobjWrapper::MakeInnerRoom(size_t current_cap, size_t desired, pmr::memory_
|
|||
inner_obj_ = newp;
|
||||
}
|
||||
|
||||
#if defined(__GNUC__) && !defined(__clang__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC optimize("Ofast")
|
||||
#endif
|
||||
|
||||
// len must be at least 16
|
||||
void ascii_pack(const char* ascii, size_t len, uint8_t* bin) {
|
||||
const char* end = ascii + len;
|
||||
|
||||
unsigned i = 0;
|
||||
while (ascii + 8 <= end) {
|
||||
for (i = 0; i < 7; ++i) {
|
||||
*bin++ = (ascii[0] >> i) | (ascii[1] << (7 - i));
|
||||
++ascii;
|
||||
}
|
||||
++ascii;
|
||||
}
|
||||
|
||||
// epilog - we do not pack since we have less than 8 bytes.
|
||||
while (ascii < end) {
|
||||
*bin++ = *ascii++;
|
||||
}
|
||||
}
|
||||
|
||||
// unpacks 8->7 encoded blob back to ascii.
|
||||
// generally, we can not unpack inplace because ascii (dest) buffer is 8/7 bigger than
|
||||
// the source buffer.
|
||||
// however, if binary data is positioned on the right of the ascii buffer with empty space on the
|
||||
// left than we can unpack inplace.
|
||||
void ascii_unpack(const uint8_t* bin, size_t ascii_len, char* ascii) {
|
||||
constexpr uint8_t kM = 0x7F;
|
||||
uint8_t p = 0;
|
||||
unsigned i = 0;
|
||||
|
||||
while (ascii_len >= 8) {
|
||||
for (i = 0; i < 7; ++i) {
|
||||
uint8_t src = *bin; // keep on stack in case we unpack inplace.
|
||||
*ascii++ = (p >> (8 - i)) | ((src << i) & kM);
|
||||
p = src;
|
||||
++bin;
|
||||
}
|
||||
|
||||
ascii_len -= 8;
|
||||
*ascii++ = p >> 1;
|
||||
}
|
||||
|
||||
DCHECK_LT(ascii_len, 8u);
|
||||
for (i = 0; i < ascii_len; ++i) {
|
||||
*ascii++ = *bin++;
|
||||
}
|
||||
}
|
||||
|
||||
// compares packed and unpacked strings. packed must be of length = binpacked_len(ascii_len).
|
||||
bool compare_packed(const uint8_t* packed, const char* ascii, size_t ascii_len) {
|
||||
unsigned i = 0;
|
||||
bool res = true;
|
||||
const char* end = ascii + ascii_len;
|
||||
|
||||
while (ascii + 8 <= end) {
|
||||
for (i = 0; i < 7; ++i) {
|
||||
uint8_t conv = (ascii[0] >> i) | (ascii[1] << (7 - i));
|
||||
res &= (conv == *packed);
|
||||
++ascii;
|
||||
++packed;
|
||||
}
|
||||
|
||||
if (!res)
|
||||
return false;
|
||||
|
||||
++ascii;
|
||||
}
|
||||
|
||||
while (ascii < end) {
|
||||
if (*ascii++ != *packed++) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#if defined(__GNUC__) && !defined(__clang__)
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
|
||||
} // namespace detail
|
||||
|
||||
using namespace std;
|
||||
|
@ -777,7 +659,7 @@ void CompactObj::SetString(std::string_view str) {
|
|||
DCHECK_GT(str.size(), kInlineLen);
|
||||
|
||||
string_view encoded = str;
|
||||
bool is_ascii = kUseAsciiEncoding && validate_ascii_fast(str.data(), str.size());
|
||||
bool is_ascii = kUseAsciiEncoding && detail::validate_ascii_fast(str.data(), str.size());
|
||||
|
||||
if (is_ascii) {
|
||||
size_t encode_len = binpacked_len(str.size());
|
||||
|
@ -792,7 +674,7 @@ void CompactObj::SetString(std::string_view str) {
|
|||
}
|
||||
|
||||
tl.tmp_buf.resize(encode_len);
|
||||
detail::ascii_pack(str.data(), str.size(), tl.tmp_buf.data());
|
||||
detail::ascii_pack_simd(str.data(), str.size(), tl.tmp_buf.data());
|
||||
encoded = string_view{reinterpret_cast<char*>(tl.tmp_buf.data()), encode_len};
|
||||
|
||||
if (encoded.size() <= kInlineLen) {
|
||||
|
@ -1125,7 +1007,7 @@ bool CompactObj::CmpEncoded(string_view sv) const {
|
|||
if (u_.r_obj.Size() != encode_len)
|
||||
return false;
|
||||
|
||||
if (!validate_ascii_fast(sv.data(), sv.size()))
|
||||
if (!detail::validate_ascii_fast(sv.data(), sv.size()))
|
||||
return false;
|
||||
|
||||
return detail::compare_packed(to_byte(u_.r_obj.inner_obj()), sv.data(), sv.size());
|
||||
|
@ -1139,7 +1021,7 @@ bool CompactObj::CmpEncoded(string_view sv) const {
|
|||
if (u_.small_str.size() != encode_len)
|
||||
return false;
|
||||
|
||||
if (!validate_ascii_fast(sv.data(), sv.size()))
|
||||
if (!detail::validate_ascii_fast(sv.data(), sv.size()))
|
||||
return false;
|
||||
|
||||
// We need to compare an unpacked sv with 2 packed parts.
|
||||
|
|
|
@ -76,16 +76,6 @@ class RobjWrapper {
|
|||
|
||||
} __attribute__((packed));
|
||||
|
||||
// unpacks 8->7 encoded blob back to ascii.
|
||||
// generally, we can not unpack inplace because ascii (dest) buffer is 8/7 bigger than
|
||||
// the source buffer.
|
||||
// however, if binary data is positioned on the right of the ascii buffer with empty space on the
|
||||
// left than we can unpack inplace.
|
||||
void ascii_unpack(const uint8_t* bin, size_t ascii_len, char* ascii);
|
||||
|
||||
// packs ascii string (does not verify) into binary form saving 1 bit per byte on average (12.5%).
|
||||
void ascii_pack(const char* ascii, size_t len, uint8_t* bin);
|
||||
|
||||
} // namespace detail
|
||||
|
||||
class CompactObj {
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
#include "base/gtest.h"
|
||||
#include "base/logging.h"
|
||||
#include "core/detail/bitpacking.h"
|
||||
#include "core/flat_set.h"
|
||||
#include "core/json_object.h"
|
||||
#include "core/mi_memory_resource.h"
|
||||
|
@ -189,13 +190,24 @@ TEST_F(CompactObjectTest, AsciiUtil) {
|
|||
std::string_view data{"aaaaaabb"};
|
||||
uint8_t buf[32];
|
||||
|
||||
char ascii2[] = "xxxxxxxxxxxxxx";
|
||||
detail::ascii_pack(data.data(), 7, buf);
|
||||
detail::ascii_unpack(buf, 7, ascii2);
|
||||
char outbuf[32] = "xxxxxxxxxxxxxx";
|
||||
detail::ascii_pack_simd(data.data(), 7, buf);
|
||||
detail::ascii_unpack(buf, 7, outbuf);
|
||||
|
||||
ASSERT_EQ('x', ascii2[7]) << ascii2;
|
||||
std::string_view actual{ascii2, 7};
|
||||
ASSERT_EQ('x', outbuf[7]) << outbuf;
|
||||
std::string_view actual{outbuf, 7};
|
||||
ASSERT_EQ(data.substr(0, 7), actual);
|
||||
|
||||
string data3;
|
||||
for (unsigned i = 0; i < 97; ++i) {
|
||||
data3.append("12345678910");
|
||||
}
|
||||
string act_str(data3.size(), 'y');
|
||||
std::vector<uint8_t> binvec(detail::binpacked_len(data3.size()));
|
||||
detail::ascii_pack_simd(data3.data(), data3.size(), binvec.data());
|
||||
detail::ascii_unpack(binvec.data(), data3.size(), act_str.data());
|
||||
|
||||
ASSERT_EQ(data3, act_str);
|
||||
}
|
||||
|
||||
TEST_F(CompactObjectTest, IntSet) {
|
||||
|
@ -453,4 +465,62 @@ TEST_F(CompactObjectTest, JsonTypeWithPathTest) {
|
|||
}
|
||||
}
|
||||
|
||||
static void ascii_pack_naive(const char* ascii, size_t len, uint8_t* bin) {
|
||||
const char* end = ascii + len;
|
||||
|
||||
unsigned i = 0;
|
||||
while (ascii + 8 <= end) {
|
||||
for (i = 0; i < 7; ++i) {
|
||||
*bin++ = (ascii[0] >> i) | (ascii[1] << (7 - i));
|
||||
++ascii;
|
||||
}
|
||||
++ascii;
|
||||
}
|
||||
|
||||
// epilog - we do not pack since we have less than 8 bytes.
|
||||
while (ascii < end) {
|
||||
*bin++ = *ascii++;
|
||||
}
|
||||
}
|
||||
|
||||
static void BM_PackNaive(benchmark::State& state) {
|
||||
string val(1024, 'a');
|
||||
uint8_t buf[1024];
|
||||
|
||||
while (state.KeepRunning()) {
|
||||
ascii_pack_naive(val.data(), val.size(), buf);
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_PackNaive);
|
||||
|
||||
static void BM_Pack(benchmark::State& state) {
|
||||
string val(1024, 'a');
|
||||
uint8_t buf[1024];
|
||||
|
||||
while (state.KeepRunning()) {
|
||||
detail::ascii_pack(val.data(), val.size(), buf);
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_Pack);
|
||||
|
||||
static void BM_Pack2(benchmark::State& state) {
|
||||
string val(1024, 'a');
|
||||
uint8_t buf[1024];
|
||||
|
||||
while (state.KeepRunning()) {
|
||||
detail::ascii_pack(val.data(), val.size(), buf);
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_Pack2);
|
||||
|
||||
static void BM_PackSimd(benchmark::State& state) {
|
||||
string val(1024, 'a');
|
||||
uint8_t buf[1024];
|
||||
|
||||
while (state.KeepRunning()) {
|
||||
detail::ascii_pack_simd(val.data(), val.size(), buf);
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_PackSimd);
|
||||
|
||||
} // namespace dfly
|
||||
|
|
209
src/core/detail/bitpacking.cc
Normal file
209
src/core/detail/bitpacking.cc
Normal file
|
@ -0,0 +1,209 @@
|
|||
// Copyright 2022, Roman Gershman. All rights reserved.
|
||||
// See LICENSE for licensing terms.
|
||||
//
|
||||
|
||||
#include "src/core/detail/bitpacking.h"
|
||||
|
||||
#include "base/logging.h"
|
||||
|
||||
#if defined(__aarch64__)
|
||||
#include "base/sse2neon.h"
|
||||
#else
|
||||
#include <emmintrin.h>
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
#include <absl/base/internal/endian.h>
|
||||
|
||||
namespace dfly {
|
||||
|
||||
namespace detail {
|
||||
|
||||
#if defined(__GNUC__) && !defined(__clang__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC optimize("Ofast")
|
||||
#endif
|
||||
|
||||
static inline uint64_t Compress8x7bit(uint64_t x) {
|
||||
x = ((x & 0x7F007F007F007F00) >> 1) | (x & 0x007F007F007F007F);
|
||||
x = ((x & 0x3FFF00003FFF0000) >> 2) | (x & 0x00003FFF00003FFF);
|
||||
x = ((x & 0x0FFFFFFF00000000) >> 4) | (x & 0x000000000FFFFFFF);
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
// Daniel Lemire's function validate_ascii_fast() - under Apache/MIT license.
|
||||
// See https://github.com/lemire/fastvalidate-utf-8/
|
||||
// The function returns true (1) if all chars passed in src are
|
||||
// 7-bit values (0x00..0x7F). Otherwise, it returns false (0).
|
||||
bool validate_ascii_fast(const char* src, size_t len) {
|
||||
size_t i = 0;
|
||||
__m128i has_error = _mm_setzero_si128();
|
||||
if (len >= 16) {
|
||||
for (; i <= len - 16; i += 16) {
|
||||
__m128i current_bytes = _mm_loadu_si128((const __m128i*)(src + i));
|
||||
has_error = _mm_or_si128(has_error, current_bytes);
|
||||
}
|
||||
}
|
||||
int error_mask = _mm_movemask_epi8(has_error);
|
||||
|
||||
char tail_has_error = 0;
|
||||
for (; i < len; i++) {
|
||||
tail_has_error |= src[i];
|
||||
}
|
||||
error_mask |= (tail_has_error & 0x80);
|
||||
|
||||
return !error_mask;
|
||||
}
|
||||
|
||||
// len must be at least 16
|
||||
void ascii_pack(const char* ascii, size_t len, uint8_t* bin) {
|
||||
uint64_t val;
|
||||
const char* end = ascii + len;
|
||||
|
||||
while (ascii + 8 <= end) {
|
||||
val = absl::little_endian::Load64(ascii);
|
||||
uint64_t dest = (val & 0xFF);
|
||||
for (unsigned i = 1; i <= 7; ++i) {
|
||||
val >>= 1;
|
||||
dest |= (val & (0x7FUL << 7 * i));
|
||||
}
|
||||
memcpy(bin, &dest, 7);
|
||||
bin += 7;
|
||||
ascii += 8;
|
||||
}
|
||||
|
||||
// epilog - we do not pack since we have less than 8 bytes.
|
||||
while (ascii < end) {
|
||||
*bin++ = *ascii++;
|
||||
}
|
||||
}
|
||||
|
||||
void ascii_pack2(const char* ascii, size_t len, uint8_t* bin) {
|
||||
uint64_t val;
|
||||
const char* end = ascii + len;
|
||||
|
||||
while (ascii + 8 <= end) {
|
||||
val = absl::little_endian::Load64(ascii);
|
||||
val = Compress8x7bit(val);
|
||||
memcpy(bin, &val, 7);
|
||||
bin += 7;
|
||||
ascii += 8;
|
||||
}
|
||||
|
||||
// epilog - we do not pack since we have less than 8 bytes.
|
||||
while (ascii < end) {
|
||||
*bin++ = *ascii++;
|
||||
}
|
||||
}
|
||||
|
||||
// The algo - do in parallel what ascii_pack does on two uint64_t integers
|
||||
void ascii_pack_simd(const char* ascii, size_t len, uint8_t* bin) {
|
||||
__m128i val;
|
||||
|
||||
// I leave out 16 bytes in addition to 16 that we load in the loop
|
||||
// because we store into bin full 16 bytes instead of 14. To prevent data
|
||||
// overwrite we finish loop one iteration earlier.
|
||||
const char* end = ascii + len - 32;
|
||||
|
||||
// Skips 8th byte (indexc 7) in the lower 8-byte part.
|
||||
const __m128i control = _mm_set_epi8(-1, -1, 14, 13, 12, 11, 10, 9, 8, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
||||
__m128i rpart, lpart;
|
||||
|
||||
// Based on the question I asked here: https://stackoverflow.com/q/74831843/2280111
|
||||
while (ascii <= end) {
|
||||
val = _mm_loadu_si128(reinterpret_cast<const __m128i*>(ascii));
|
||||
|
||||
/*
|
||||
x = ((x & 0x7F007F007F007F00) >> 1) | (x & 0x007F007F007F007F);
|
||||
x = ((x & 0x3FFF00003FFF0000) >> 2) | (x & 0x00003FFF00003FFF);
|
||||
x = ((x & 0x0FFFFFFF00000000) >> 4) | (x & 0x000000000FFFFFFF);
|
||||
*/
|
||||
|
||||
rpart = _mm_and_si128(val, _mm_set1_epi64x(0x007F007F007F007F));
|
||||
lpart = _mm_and_si128(val, _mm_set1_epi64x(0x7F007F007F007F00));
|
||||
val = _mm_or_si128(_mm_srli_epi64(lpart, 1), rpart);
|
||||
|
||||
rpart = _mm_and_si128(val, _mm_set1_epi64x(0x00003FFF00003FFF));
|
||||
lpart = _mm_and_si128(val, _mm_set1_epi64x(0x3FFF00003FFF0000));
|
||||
val = _mm_or_si128(_mm_srli_epi64(lpart, 2), rpart);
|
||||
|
||||
rpart = _mm_and_si128(val, _mm_set1_epi64x(0x000000000FFFFFFF));
|
||||
lpart = _mm_and_si128(val, _mm_set1_epi64x(0x0FFFFFFF00000000));
|
||||
val = _mm_or_si128(_mm_srli_epi64(lpart, 4), rpart);
|
||||
|
||||
val = _mm_shuffle_epi8(val, control);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(bin), val);
|
||||
bin += 14;
|
||||
ascii += 16;
|
||||
}
|
||||
|
||||
end += 32; // Bring back end.
|
||||
DCHECK(ascii < end);
|
||||
ascii_pack(ascii, end - ascii, bin);
|
||||
}
|
||||
|
||||
// unpacks 8->7 encoded blob back to ascii.
|
||||
// generally, we can not unpack inplace because ascii (dest) buffer is 8/7 bigger than
|
||||
// the source buffer.
|
||||
// however, if binary data is positioned on the right of the ascii buffer with empty space on the
|
||||
// left than we can unpack inplace.
|
||||
void ascii_unpack(const uint8_t* bin, size_t ascii_len, char* ascii) {
|
||||
constexpr uint8_t kM = 0x7F;
|
||||
uint8_t p = 0;
|
||||
unsigned i = 0;
|
||||
|
||||
while (ascii_len >= 8) {
|
||||
for (i = 0; i < 7; ++i) {
|
||||
uint8_t src = *bin; // keep on stack in case we unpack inplace.
|
||||
*ascii++ = (p >> (8 - i)) | ((src << i) & kM);
|
||||
p = src;
|
||||
++bin;
|
||||
}
|
||||
|
||||
ascii_len -= 8;
|
||||
*ascii++ = p >> 1;
|
||||
}
|
||||
|
||||
DCHECK_LT(ascii_len, 8u);
|
||||
for (i = 0; i < ascii_len; ++i) {
|
||||
*ascii++ = *bin++;
|
||||
}
|
||||
}
|
||||
|
||||
// compares packed and unpacked strings. packed must be of length = binpacked_len(ascii_len).
|
||||
bool compare_packed(const uint8_t* packed, const char* ascii, size_t ascii_len) {
|
||||
unsigned i = 0;
|
||||
bool res = true;
|
||||
const char* end = ascii + ascii_len;
|
||||
|
||||
while (ascii + 8 <= end) {
|
||||
for (i = 0; i < 7; ++i) {
|
||||
uint8_t conv = (ascii[0] >> i) | (ascii[1] << (7 - i));
|
||||
res &= (conv == *packed);
|
||||
++ascii;
|
||||
++packed;
|
||||
}
|
||||
|
||||
if (!res)
|
||||
return false;
|
||||
|
||||
++ascii;
|
||||
}
|
||||
|
||||
while (ascii < end) {
|
||||
if (*ascii++ != *packed++) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#if defined(__GNUC__) && !defined(__clang__)
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
|
||||
} // namespace detail
|
||||
|
||||
} // namespace dfly
|
36
src/core/detail/bitpacking.h
Normal file
36
src/core/detail/bitpacking.h
Normal file
|
@ -0,0 +1,36 @@
|
|||
// Copyright 2022, Roman Gershman. All rights reserved.
|
||||
// See LICENSE for licensing terms.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
namespace dfly {
|
||||
|
||||
namespace detail {
|
||||
|
||||
bool validate_ascii_fast(const char* src, size_t len);
|
||||
|
||||
// unpacks 8->7 encoded blob back to ascii.
|
||||
// generally, we can not unpack inplace because ascii (dest) buffer is 8/7 bigger than
|
||||
// the source buffer.
|
||||
// however, if binary data is positioned on the right of the ascii buffer with empty space on the
|
||||
// left than we can unpack inplace.
|
||||
void ascii_unpack(const uint8_t* bin, size_t ascii_len, char* ascii);
|
||||
|
||||
// packs ascii string (does not verify) into binary form saving 1 bit per byte on average (12.5%).
|
||||
void ascii_pack(const char* ascii, size_t len, uint8_t* bin);
|
||||
void ascii_pack2(const char* ascii, size_t len, uint8_t* bin);
|
||||
|
||||
void ascii_pack_simd(const char* ascii, size_t len, uint8_t* bin);
|
||||
bool compare_packed(const uint8_t* packed, const char* ascii, size_t ascii_len);
|
||||
|
||||
// maps ascii len to 7-bit packed length. Each 8 bytes are converted to 7 bytes.
|
||||
inline constexpr size_t binpacked_len(size_t ascii_len) {
|
||||
return (ascii_len * 7 + 7) / 8; /* rounded up */
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace dfly
|
|
@ -140,7 +140,7 @@ TEST_F(GenericFamilyTest, Rename) {
|
|||
int64_t val = CheckedInt({"get", "x"});
|
||||
ASSERT_EQ(kint64min, val); // does not exist
|
||||
|
||||
ASSERT_EQ(Run({"get", "b"}), x_val); // swapped.
|
||||
ASSERT_EQ(x_val, Run({"get", "b"})); // swapped.
|
||||
|
||||
EXPECT_EQ(CheckedInt({"exists", "x", "b"}), 1);
|
||||
|
||||
|
|
Loading…
Reference in a new issue