chore: introduce an optimized integer compare algorithm for lists (#3813)

* chore: introduce an optimized integer compare algorithm for lists Problem: when lists are long OpRem will spend lots of time comparing an element with records in the list. For integer-based lists, most of the time is spent in lpCompare. In addition, lpGet has lots of branches that penalize integers use-cases. This PR: 1. Introduces lpGet2 - the improved version with less branches. 2. Benchmarks lpCompare vs an algorithm that compares records to an integer. 3. Benchmarks lpGet vs lpGet2 ``` ---------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------- BM_LpCompare 1187 ns 1187 ns 4715144 BM_LpCompareInt 371 ns 371 ns 15216611 BM_LpGet/1 265 ns 265 ns 21473149 BM_LpGet/2 214 ns 214 ns 26075164 ``` There are no functional changes to the Dragonfly code. Signed-off-by: Roman Gershman <roman@dragonflydb.io> * chore: fixes --------- Signed-off-by: Roman Gershman <roman@dragonflydb.io>
2024-12-15 17:51:06 +00:00 · 2024-09-29 09:04:01 +03:00 · 2024-09-29 09:04:01 +03:00 · a3cabd10d3
commit a3cabd10d3
parent 753a7808d3
3 changed files with 110 additions and 11 deletions
--- a/src/core/compact_object_test.cc
+++ b/src/core/compact_object_test.cc
@ -605,6 +605,34 @@ TEST_F(CompactObjectTest, RawInterface) {
  }
 }

+TEST_F(CompactObjectTest, lpGetInteger) {
+  int64_t val = -1;
+  uint8_t* lp = lpNew(0);
+  for (int j = 0; j < 60; ++j) {
+    lp = lpAppendInteger(lp, val);
+    val *= 2;
+  }
+  val = 1;
+  for (int j = 0; j < 600; ++j) {
+    string str(j * 500, 'a');
+    lp = lpAppend(lp, reinterpret_cast<const uint8_t*>(str.data()), str.size());
+  }
+  uint8_t* ptr = lpFirst(lp);
+  while (ptr) {
+    int64_t len1, len2;
+    uint8_t* val1 = lpGet(ptr, &len1, nullptr);
+    int res = lpGetInteger(ptr, &len2);
+    if (res) {
+      ASSERT_EQ(len1, len2);
+      ASSERT_TRUE(val1 == NULL);
+    } else {
+      ASSERT_TRUE(val1 != NULL);
+    }
+    ptr = lpNext(lp, ptr);
+  }
+  lpFree(lp);
+}
+
 static void ascii_pack_naive(const char* ascii, size_t len, uint8_t* bin) {
  const char* end = ascii + len;

@ -764,11 +792,9 @@ static void BM_LpCompareInt(benchmark::State& state) {
    int64_t sz;
    while (elem) {
      DCHECK_NE(0xFF, *elem);
-      unsigned char* value = lpGet(elem, &sz, NULL);
-      if (!value) {
-        int res = sz == val;
-        benchmark::DoNotOptimize(res);
-      }
+      lpGetInteger(elem, &sz);
+      int res = sz == val;
+      benchmark::DoNotOptimize(res);
      elem = lpPrev(lp, elem);
    }
  }
@ -777,6 +803,7 @@ static void BM_LpCompareInt(benchmark::State& state) {
 BENCHMARK(BM_LpCompareInt);

 static void BM_LpGet(benchmark::State& state) {
+  unsigned version = state.range(0);
  uint8_t* lp = lpNew(0);
  int64_t val = -1;
  for (unsigned i = 0; i < 60; ++i) {
@ -786,15 +813,23 @@ static void BM_LpGet(benchmark::State& state) {

  while (state.KeepRunning()) {
    uint8_t* elem = lpLast(lp);
-    int64_t sz;
-    while (elem) {
-      unsigned char* value = lpGet(elem, &sz, NULL);
-      CHECK(!value && sz < 0);
-      elem = lpPrev(lp, elem);
+    int64_t ival;
+    if (version == 1) {
+      while (elem) {
+        unsigned char* value = lpGet(elem, &ival, NULL);
+        benchmark::DoNotOptimize(value);
+        elem = lpPrev(lp, elem);
+      }
+    } else {
+      while (elem) {
+        int res = lpGetInteger(elem, &ival);
+        benchmark::DoNotOptimize(res);
+        elem = lpPrev(lp, elem);
+      }
    }
  }
  lpFree(lp);
 }
-BENCHMARK(BM_LpGet);
+BENCHMARK(BM_LpGet)->Arg(1)->Arg(2);

 }  // namespace dfly
--- a/src/redis/listpack.c
+++ b/src/redis/listpack.c
@ -649,6 +649,66 @@ lpGetWithSize(unsigned char *p, int64_t *count, unsigned char *intbuf, uint64_t
    }
 }

+int lpGetInteger(unsigned char *p, int64_t *ival) {
+    int64_t val;
+    uint64_t uval = 0, negstart = UINT64_MAX, negmax = 0;
+    uint8_t encoding = p[0];
+    
+    // Prioritize checking for integers first.
+    if (encoding < LP_ENCODING_7BIT_UINT_MASK) {        
+        uval = encoding & 0x7f;    
+    } else if (encoding > LP_ENCODING_32BIT_STR) {
+        switch (encoding) {
+            case LP_ENCODING_16BIT_INT:
+                uval = (uint64_t)p[1] | (uint64_t)p[2] << 8;
+                negstart = (uint64_t)1<<15;
+                negmax = UINT16_MAX;
+                break;
+            case LP_ENCODING_24BIT_INT:
+                uval = (uint64_t)p[1] | (uint64_t)p[2] << 8 | (uint64_t)p[3] << 16;
+                negstart = (uint64_t)1<<23;
+                negmax = UINT32_MAX>>8;
+                break;
+            case LP_ENCODING_32BIT_INT:
+                uval = (uint64_t)p[1] | (uint64_t)p[2] << 8 | (uint64_t)p[3] << 16 | (uint64_t)p[4] << 24;
+                negstart = (uint64_t)1<<31;
+                negmax = UINT32_MAX;
+                break;
+            case LP_ENCODING_64BIT_INT:                
+                uval = (uint64_t)p[1] | (uint64_t)p[2] << 8 | (uint64_t)p[3] << 16 | (uint64_t)p[4] << 24 |
+               (uint64_t)p[5] << 32 | (uint64_t)p[6] << 40 | (uint64_t)p[7] << 48 | (uint64_t)p[8] << 56;
+                negstart = (uint64_t)1<<63;
+                negmax = UINT64_MAX;
+            break;
+            default:
+                return 0;
+        }
+    } else if (encoding < LP_ENCODING_13BIT_INT_MASK && encoding >= LP_ENCODING_6BIT_STR_MASK) {
+   	    uval = ((encoding & 0x1f) << 8) | p[1];
+        negstart = (uint64_t)1 << 12;
+        negmax = 8191;        
+    } else {
+        // string encodings.
+        return 0;
+    }
+
+     /* We reach this code path only for integer encodings.
+     * Convert the unsigned value to the signed one using two's complement
+     * rule. */
+    if (uval >= negstart) {
+        /* This three steps conversion should avoid undefined behaviors
+         * in the unsigned -> signed conversion. */
+        uval = negmax-uval;
+        val = uval;
+        val = -val-1;
+    } else {
+        val = uval;
+    }
+    
+    *ival = val;
+    return 1;    
+}
+
 unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf) {
    return lpGetWithSize(p, count, intbuf, NULL);
 }
--- a/src/redis/listpack.h
+++ b/src/redis/listpack.h
@ -73,6 +73,10 @@ unsigned char *lpDeleteRange(unsigned char *lp, long index, unsigned long num);
 unsigned char *lpMerge(unsigned char **first, unsigned char **second);
 unsigned long lpLength(unsigned char *lp);
 unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf);
+
+// Fills count and returns 1 if the item is an integer, 0 otherwise.
+int lpGetInteger(unsigned char *p, int64_t *ival);
+
 unsigned char *lpGetValue(unsigned char *p, unsigned int *slen, long long *lval);
 unsigned char *lpFind(unsigned char *lp, unsigned char *p, unsigned char *s, uint32_t slen, unsigned int skip);
 unsigned char *lpFirst(unsigned char *lp);