1
0
Fork 0
mirror of https://github.com/dragonflydb/dragonfly.git synced 2024-12-14 11:58:02 +00:00

feat: add s390x architecture support (#1214)

* fix(lua): use native architecture when compiling lua for s390x.

Signed-off-by: iko1 <me@remotecpp.dev>

* feat(server): implement CompareFP for s390x architecture.

Signed-off-by: iko1 <me@remotecpp.dev>

* feat: implement validate_ascii_fast function variant for s390x arch.

Signed-off-by: iko1 <me@remotecpp.dev>

* fix: add comments before s390x vector operations

Signed-off-by: iko1 <me@remotecpp.dev>

* fix validate_ascii_fast function logic after CR comment

Signed-off-by: iko1 <me@remotecpp.dev>

* Revert "fix(lua): use native architecture when compiling lua for s390x."

This reverts commit 6cc5d8a8ed.

* fix(lua): use native architecture when compiling lua for s390x.

Signed-off-by: iko1 <me@remotecpp.dev>

* refactor validate_ascii_fast function after CR comment

Signed-off-by: iko1 <me@remotecpp.dev>

* include vecintrin.h from sse_port.h rather the misleading filename

Signed-off-by: iko1 <me@remotecpp.dev>

---------

Signed-off-by: iko1 <me@remotecpp.dev>
This commit is contained in:
iko1 2023-06-18 20:33:22 +02:00 committed by GitHub
parent 6d4d740d6e
commit 19d7622280
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 74 additions and 2 deletions

View file

@ -12,10 +12,10 @@ index d42d14b7..75647e72 100644
#define LUAI_MAXSTACK 15000
#endif
diff --git a/makefile b/makefile
index d46e650c..e347e614 100644
index d46e650c..c27e5677 100644
--- a/makefile
+++ b/makefile
@@ -66,13 +66,23 @@ LOCAL = $(TESTS) $(CWARNS)
@@ -66,13 +66,25 @@ LOCAL = $(TESTS) $(CWARNS)
# enable Linux goodies
@ -32,6 +32,8 @@ index d46e650c..e347e614 100644
+OPTFLAGS= -march=sandybridge
+else ifeq ($(uname_m), aarch64)
+OPTFLAGS= -march=armv8.2-a+fp16+rcpc+dotprod+crypto
+else ifeq ($(uname_m), s390x)
+OPTFLAGS= -march=native
+else
+ $(error ERROR: unknown architecture $(uname_m))
+endif

View file

@ -879,6 +879,34 @@ unsigned BucketBase<NUM_SLOTS, NUM_OVR>::UnsetStashPtr(uint8_t fp_hash, unsigned
return res;
}
#ifdef __s390x__
template <unsigned NUM_SLOTS, unsigned NUM_OVR>
uint32_t BucketBase<NUM_SLOTS, NUM_OVR>::CompareFP(uint8_t fp) const {
static_assert(FpArray{}.size() <= 16);
vector unsigned char v1;
// Replicate 16 times fp to key_data.
for (int i = 0; i < 16; i++) {
v1[i] = fp;
}
// Loads 16 bytes of src into seg_data.
vector unsigned char v2 = vec_load_len(finger_arr_.data(), 16);
// compare 1-byte vectors seg_data and key_data, dst[i] := ( a[i] == b[i] ) ? 0xFF : 0.
vector bool char rv_mask = vec_cmpeq(v1, v2);
// collapses 16 msb bits from each byte in rv_mask into mask.
int mask = 0;
for (int i = 0; i < 16; i++) {
if (rv_mask[i]) {
mask |= 1 << i;
}
}
return mask;
}
#else
template <unsigned NUM_SLOTS, unsigned NUM_OVR>
uint32_t BucketBase<NUM_SLOTS, NUM_OVR>::CompareFP(uint8_t fp) const {
static_assert(FpArray{}.size() <= 16);
@ -898,6 +926,7 @@ uint32_t BucketBase<NUM_SLOTS, NUM_OVR>::CompareFP(uint8_t fp) const {
// Note: Last 2 operations can be combined in skylake with _mm_cmpeq_epi8_mask.
return mask;
}
#endif
// Bucket slot array goes from left to right: [x, x, ...]
// Shift right vacates the first slot on the left by shifting all the elements right and

View file

@ -104,6 +104,42 @@ static inline pair<const char*, uint8_t*> simd_variant2_pack(const char* ascii,
// See https://github.com/lemire/fastvalidate-utf-8/
// The function returns true (1) if all chars passed in src are
// 7-bit values (0x00..0x7F). Otherwise, it returns false (0).
#ifdef __s390x__
bool validate_ascii_fast(const char* src, size_t len) {
size_t i = 0;
// Initialize a vector in which all the elements are set to zero.
vector unsigned char has_error = vec_splat_s8(0);
if (len >= 16) {
for (; i <= len - 16; i += 16) {
// Load 16 bytes from buffer into a vector.
vector unsigned char current_bytes = vec_load_len((signed char*)(src + i), 16);
// Perform a bitwise OR operation between the current and the previously loaded contents.
has_error = vec_orc(has_error, current_bytes);
}
}
// Initialize a vector in which all the elements are set to an invalid ASCII value.
vector unsigned char rep_invalid_values = vec_splat_s8(0x80);
// Perform bitwise AND-complement operation between two vectors.
vector unsigned char andc_result = vec_andc(rep_invalid_values, has_error);
// Tests whether any of corresponding elements of the given vectors are not equal.
// After the bitwise operation, both vectors should be equal if ASCII values.
if (!vec_all_eq(rep_invalid_values, andc_result)) {
return false;
}
for (; i < len; i++) {
if (src[i] & 0x80) {
return false;
}
}
return true;
}
#else
bool validate_ascii_fast(const char* src, size_t len) {
size_t i = 0;
__m128i has_error = _mm_setzero_si128();
@ -123,6 +159,7 @@ bool validate_ascii_fast(const char* src, size_t len) {
return !error_mask;
}
#endif
// len must be at least 16
void ascii_pack(const char* ascii, size_t len, uint8_t* bin) {

View file

@ -5,6 +5,8 @@
#pragma once
#if defined(__aarch64__)
#include "base/sse2neon.h"
#elif defined(__s390x__)
#include <vecintrin.h>
#else
#include <emmintrin.h>
#include <tmmintrin.h>
@ -12,6 +14,7 @@
namespace dfly {
#ifndef __s390x__
inline __m128i mm_loadu_si128(const __m128i* ptr) {
#if defined(__aarch64__)
__m128i res;
@ -22,5 +25,6 @@ inline __m128i mm_loadu_si128(const __m128i* ptr) {
return _mm_loadu_si128(ptr);
#endif
}
#endif
} // namespace dfly