mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2024-12-14 11:58:02 +00:00
feat: add s390x architecture support (#1214)
* fix(lua): use native architecture when compiling lua for s390x.
Signed-off-by: iko1 <me@remotecpp.dev>
* feat(server): implement CompareFP for s390x architecture.
Signed-off-by: iko1 <me@remotecpp.dev>
* feat: implement validate_ascii_fast function variant for s390x arch.
Signed-off-by: iko1 <me@remotecpp.dev>
* fix: add comments before s390x vector operations
Signed-off-by: iko1 <me@remotecpp.dev>
* fix validate_ascii_fast function logic after CR comment
Signed-off-by: iko1 <me@remotecpp.dev>
* Revert "fix(lua): use native architecture when compiling lua for s390x."
This reverts commit 6cc5d8a8ed
.
* fix(lua): use native architecture when compiling lua for s390x.
Signed-off-by: iko1 <me@remotecpp.dev>
* refactor validate_ascii_fast function after CR comment
Signed-off-by: iko1 <me@remotecpp.dev>
* include vecintrin.h from sse_port.h rather the misleading filename
Signed-off-by: iko1 <me@remotecpp.dev>
---------
Signed-off-by: iko1 <me@remotecpp.dev>
This commit is contained in:
parent
6d4d740d6e
commit
19d7622280
4 changed files with 74 additions and 2 deletions
|
@ -12,10 +12,10 @@ index d42d14b7..75647e72 100644
|
|||
#define LUAI_MAXSTACK 15000
|
||||
#endif
|
||||
diff --git a/makefile b/makefile
|
||||
index d46e650c..e347e614 100644
|
||||
index d46e650c..c27e5677 100644
|
||||
--- a/makefile
|
||||
+++ b/makefile
|
||||
@@ -66,13 +66,23 @@ LOCAL = $(TESTS) $(CWARNS)
|
||||
@@ -66,13 +66,25 @@ LOCAL = $(TESTS) $(CWARNS)
|
||||
|
||||
|
||||
# enable Linux goodies
|
||||
|
@ -32,6 +32,8 @@ index d46e650c..e347e614 100644
|
|||
+OPTFLAGS= -march=sandybridge
|
||||
+else ifeq ($(uname_m), aarch64)
|
||||
+OPTFLAGS= -march=armv8.2-a+fp16+rcpc+dotprod+crypto
|
||||
+else ifeq ($(uname_m), s390x)
|
||||
+OPTFLAGS= -march=native
|
||||
+else
|
||||
+ $(error ERROR: unknown architecture $(uname_m))
|
||||
+endif
|
||||
|
|
|
@ -879,6 +879,34 @@ unsigned BucketBase<NUM_SLOTS, NUM_OVR>::UnsetStashPtr(uint8_t fp_hash, unsigned
|
|||
return res;
|
||||
}
|
||||
|
||||
#ifdef __s390x__
|
||||
template <unsigned NUM_SLOTS, unsigned NUM_OVR>
|
||||
uint32_t BucketBase<NUM_SLOTS, NUM_OVR>::CompareFP(uint8_t fp) const {
|
||||
static_assert(FpArray{}.size() <= 16);
|
||||
vector unsigned char v1;
|
||||
|
||||
// Replicate 16 times fp to key_data.
|
||||
for (int i = 0; i < 16; i++) {
|
||||
v1[i] = fp;
|
||||
}
|
||||
|
||||
// Loads 16 bytes of src into seg_data.
|
||||
vector unsigned char v2 = vec_load_len(finger_arr_.data(), 16);
|
||||
|
||||
// compare 1-byte vectors seg_data and key_data, dst[i] := ( a[i] == b[i] ) ? 0xFF : 0.
|
||||
vector bool char rv_mask = vec_cmpeq(v1, v2);
|
||||
|
||||
// collapses 16 msb bits from each byte in rv_mask into mask.
|
||||
int mask = 0;
|
||||
for (int i = 0; i < 16; i++) {
|
||||
if (rv_mask[i]) {
|
||||
mask |= 1 << i;
|
||||
}
|
||||
}
|
||||
|
||||
return mask;
|
||||
}
|
||||
#else
|
||||
template <unsigned NUM_SLOTS, unsigned NUM_OVR>
|
||||
uint32_t BucketBase<NUM_SLOTS, NUM_OVR>::CompareFP(uint8_t fp) const {
|
||||
static_assert(FpArray{}.size() <= 16);
|
||||
|
@ -898,6 +926,7 @@ uint32_t BucketBase<NUM_SLOTS, NUM_OVR>::CompareFP(uint8_t fp) const {
|
|||
// Note: Last 2 operations can be combined in skylake with _mm_cmpeq_epi8_mask.
|
||||
return mask;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Bucket slot array goes from left to right: [x, x, ...]
|
||||
// Shift right vacates the first slot on the left by shifting all the elements right and
|
||||
|
|
|
@ -104,6 +104,42 @@ static inline pair<const char*, uint8_t*> simd_variant2_pack(const char* ascii,
|
|||
// See https://github.com/lemire/fastvalidate-utf-8/
|
||||
// The function returns true (1) if all chars passed in src are
|
||||
// 7-bit values (0x00..0x7F). Otherwise, it returns false (0).
|
||||
#ifdef __s390x__
|
||||
bool validate_ascii_fast(const char* src, size_t len) {
|
||||
size_t i = 0;
|
||||
|
||||
// Initialize a vector in which all the elements are set to zero.
|
||||
vector unsigned char has_error = vec_splat_s8(0);
|
||||
if (len >= 16) {
|
||||
for (; i <= len - 16; i += 16) {
|
||||
// Load 16 bytes from buffer into a vector.
|
||||
vector unsigned char current_bytes = vec_load_len((signed char*)(src + i), 16);
|
||||
// Perform a bitwise OR operation between the current and the previously loaded contents.
|
||||
has_error = vec_orc(has_error, current_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize a vector in which all the elements are set to an invalid ASCII value.
|
||||
vector unsigned char rep_invalid_values = vec_splat_s8(0x80);
|
||||
|
||||
// Perform bitwise AND-complement operation between two vectors.
|
||||
vector unsigned char andc_result = vec_andc(rep_invalid_values, has_error);
|
||||
|
||||
// Tests whether any of corresponding elements of the given vectors are not equal.
|
||||
// After the bitwise operation, both vectors should be equal if ASCII values.
|
||||
if (!vec_all_eq(rep_invalid_values, andc_result)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (; i < len; i++) {
|
||||
if (src[i] & 0x80) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
bool validate_ascii_fast(const char* src, size_t len) {
|
||||
size_t i = 0;
|
||||
__m128i has_error = _mm_setzero_si128();
|
||||
|
@ -123,6 +159,7 @@ bool validate_ascii_fast(const char* src, size_t len) {
|
|||
|
||||
return !error_mask;
|
||||
}
|
||||
#endif
|
||||
|
||||
// len must be at least 16
|
||||
void ascii_pack(const char* ascii, size_t len, uint8_t* bin) {
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
#pragma once
|
||||
#if defined(__aarch64__)
|
||||
#include "base/sse2neon.h"
|
||||
#elif defined(__s390x__)
|
||||
#include <vecintrin.h>
|
||||
#else
|
||||
#include <emmintrin.h>
|
||||
#include <tmmintrin.h>
|
||||
|
@ -12,6 +14,7 @@
|
|||
|
||||
namespace dfly {
|
||||
|
||||
#ifndef __s390x__
|
||||
inline __m128i mm_loadu_si128(const __m128i* ptr) {
|
||||
#if defined(__aarch64__)
|
||||
__m128i res;
|
||||
|
@ -22,5 +25,6 @@ inline __m128i mm_loadu_si128(const __m128i* ptr) {
|
|||
return _mm_loadu_si128(ptr);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace dfly
|
||||
|
|
Loading…
Reference in a new issue