feat: add support for big values in SeederV2 (#4222)

* add support for big values in SeederV2 --------- Signed-off-by: kostas <kostas@dragonflydb.io>
2024-12-14 11:58:02 +00:00 · 2024-12-05 09:47:41 +01:00 · 2024-12-05 09:47:41 +01:00 · 7ccad66fb1
commit 7ccad66fb1
parent ad73e18f6d
4 changed files with 134 additions and 29 deletions
--- a/tests/dragonfly/seeder/init.py
+++ b/tests/dragonfly/seeder/init.py
@ -19,6 +19,7 @@ class SeederBase:
    UID_COUNTER = 1  # multiple generators should not conflict on keys
    CACHED_SCRIPTS = {}
    DEFAULT_TYPES = ["STRING", "LIST", "SET", "HASH", "ZSET", "JSON"]
+    BIG_VALUE_TYPES = ["LIST", "SET", "HASH", "ZSET"]

    def __init__(self, types: typing.Optional[typing.List[str]] = None):
        self.uid = SeederBase.UID_COUNTER
@ -137,6 +138,10 @@ class Seeder(SeederBase):
        data_size=100,
        collection_size=None,
        types: typing.Optional[typing.List[str]] = None,
+        huge_value_percentage=1,
+        huge_value_size=1024,
+        # 1 huge entries per container/key as default
+        huge_value_csize=1,
    ):
        SeederBase.__init__(self, types)
        self.key_target = key_target
@ -146,6 +151,10 @@ class Seeder(SeederBase):
        else:
            self.collection_size = collection_size

+        self.huge_value_percentage = huge_value_percentage
+        self.huge_value_size = huge_value_size
+        self.huge_value_csize = huge_value_csize
+
        self.units = [
            Seeder.Unit(
                prefix=f"k-s{self.uid}u{i}-",
@ -166,6 +175,9 @@ class Seeder(SeederBase):
            target_deviation if target_deviation is not None else -1,
            self.data_size,
            self.collection_size,
+            self.huge_value_percentage,
+            self.huge_value_size,
+            self.huge_value_csize,
        ]

        sha = await client.script_load(Seeder._load_script("generate"))
@ -196,8 +208,14 @@ class Seeder(SeederBase):
            unit.stop_key if using_stopkey else "",
        ] + args

-        unit.counter = await client.evalsha(sha, 0, *args)
+        result = await client.evalsha(sha, 0, *args)
+        result = result.split()
+        unit.counter = int(result[0])
+        huge_keys = int(result[1])
+        huge_entries = int(result[2])

-        logging.debug(
-            f"running unit {unit.prefix}/{unit.type} took {time.time() - s}, target {args[4+0]}"
-        )
+        msg = f"running unit {unit.prefix}/{unit.type} took {time.time() - s}, target {args[4+0]}"
+        if huge_keys > 0:
+            msg = f"{msg}. Total huge keys added {huge_keys} with {args[11]} elements each. Total extra modified huge entries {huge_entries}."
+
+        logging.debug(msg)
--- a/tests/dragonfly/seeder/script-generate.lua
+++ b/tests/dragonfly/seeder/script-generate.lua
@ -18,21 +18,48 @@ local total_ops = tonumber(ARGV[6])
 local min_dev = tonumber(ARGV[7])
 local data_size = tonumber(ARGV[8])
 local collection_size = tonumber(ARGV[9])
+-- Probability of each key in key_target to be a big value
+local huge_value_percentage = tonumber(ARGV[10])
+local huge_value_size = tonumber(ARGV[11])
+local huge_value_csize = tonumber(ARGV[12])

 -- collect all keys belonging to this script
 -- assumes exclusive ownership
 local keys = LU_collect_keys(prefix, type)

-LG_funcs.init(data_size, collection_size)
+LG_funcs.init(data_size, collection_size, huge_value_percentage, huge_value_size, huge_value_csize)
 local addfunc = LG_funcs['add_' .. string.lower(type)]
 local modfunc = LG_funcs['mod_' .. string.lower(type)]
+local huge_entries = LG_funcs["get_huge_entries"]
+
+local huge_keys = 0
+
+local function huge_entry()
+    local ratio = LG_funcs.huge_value_percentage / 100
+    -- [0, 1]
+    local rand = math.random()
+    local huge_entry = (ratio > rand)
+    return huge_entry
+end

 local function action_add()
    local key = prefix .. tostring(key_counter)
-    key_counter = key_counter + 1
+    local op_type = string.lower(type)
+    local is_huge = false
+    -- `string` and `json` huge entries are not supported so
+    -- we don't roll a dice to decide if they are huge or not
+    if op_type ~= "string" and op_type ~= "json" then
+      is_huge = huge_entry()
+    end
+
+    key_counter = key_counter + 1
+    if is_huge then
+      huge_keys = huge_keys + 1
+    end

-    addfunc(key, keys)
    table.insert(keys, key)
+    keys[key] = is_huge
+    addfunc(key, keys)
 end

 local function action_mod()
@ -84,7 +111,8 @@ while true do
    -- update probability only every 10 iterations
    if counter % 10 == 0 then
        -- calculate intensity (not normalized probabilities)
-        -- please see attached plots in PR to undertand convergence
+        -- please see attached plots in PR to understand convergence
+        -- https://github.com/dragonflydb/dragonfly/pull/2556

        -- the add intensity is monotonically decreasing with keycount growing,
        -- the delete intensity is monotonically increasing with keycount growing,
@ -121,4 +149,4 @@ if stop_key ~= '' then
    redis.call('DEL', stop_key)
 end

-return key_counter
+return tostring(key_counter) .. " " .. tostring(huge_keys) .. " " .. tostring(huge_entries())
--- a/tests/dragonfly/seeder/script-genlib.lua
+++ b/tests/dragonfly/seeder/script-genlib.lua
@ -1,9 +1,36 @@
 local LG_funcs = {}

-function LG_funcs.init(dsize, csize)
+function LG_funcs.init(dsize, csize, large_val_perc, large_val_sz, huge_value_csize)
    LG_funcs.dsize = dsize
    LG_funcs.csize = csize
    LG_funcs.esize = math.ceil(dsize / csize)
+    LG_funcs.huge_value_percentage = large_val_perc
+    LG_funcs.huge_value_size = large_val_sz
+    LG_funcs.huge_value_csize = huge_value_csize
+end
+
+local huge_entries = 0
+
+local function randstr(huge_entry)
+    local str
+    if huge_entry then
+        str = dragonfly.randstr(LG_funcs.huge_value_size)
+        huge_entries = huge_entries + 1
+    else
+        str = dragonfly.randstr(LG_funcs.esize)
+    end
+    return str
+end
+
+local function randstr_sequence(huge_entry)
+    local strs
+    if huge_entry then
+        strs = dragonfly.randstr(LG_funcs.huge_value_size, LG_funcs.huge_value_csize)
+        huge_entries = huge_entries + 1
+    else
+        strs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
+    end
+    return strs
 end

 -- strings
@ -27,23 +54,24 @@ end
 -- lists
 -- store list of random blobs of default container/element sizes

-function LG_funcs.add_list(key)
-    local elements = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
-    redis.apcall('LPUSH', key, unpack(elements))
+function LG_funcs.add_list(key, keys)
+    local is_huge = keys[key]
+    redis.apcall('LPUSH', key, unpack(randstr_sequence(is_huge)))
 end

-function LG_funcs.mod_list(key)
+function LG_funcs.mod_list(key, keys)
    -- equally likely pops and pushes, we rely on the list size being large enough
    -- to "highly likely" not get emptied out by consequitve pops
+    local is_huge = keys[key]
    local action = math.random(1, 4)
    if action == 1 then
        redis.apcall('RPOP', key)
    elseif action == 2 then
        redis.apcall('LPOP', key)
    elseif action == 3 then
-        redis.apcall('LPUSH', key, dragonfly.randstr(LG_funcs.esize))
+      redis.apcall('LPUSH', key, randstr(is_huge))
    else
-        redis.apcall('RPUSH', key, dragonfly.randstr(LG_funcs.esize))
+      redis.apcall('RPUSH', key, randstr(is_huge))
    end
 end

@ -62,17 +90,18 @@ function LG_funcs.add_set(key, keys)
        end
        redis.apcall('SDIFFSTORE', key, keys[i1], keys[i2])
    else
-        local elements = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
-        redis.apcall('SADD', key, unpack(elements))
+        local is_huge = keys[key]
+        redis.apcall('SADD', key, unpack(randstr_sequence(is_huge)))
    end
 end

-function LG_funcs.mod_set(key)
+function LG_funcs.mod_set(key, keys)
     -- equally likely pops and additions
    if math.random() < 0.5 then
        redis.apcall('SPOP', key)
    else
-        redis.apcall('SADD', key, dragonfly.randstr(LG_funcs.esize))
+        local is_huge = keys[key]
+        redis.apcall('SADD', key, randstr(is_huge))
    end
 end

@ -81,8 +110,16 @@ end
 -- store  {to_string(i): value for i in [1, csize]},
 -- where `value` is a random string for even indices and a number for odd indices

-function LG_funcs.add_hash(key)
-    local blobs  = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize / 2)
+function LG_funcs.add_hash(key, keys)
+    local blobs
+    local is_huge = keys[key]
+    if is_huge then
+        blobs  = dragonfly.randstr(LG_funcs.huge_value_size, LG_funcs.csize / 2)
+        huge_entries = huge_entries + 1
+    else
+        blobs  = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize / 2)
+    end
+
    local htable = {}
    for i = 1,  LG_funcs.csize, 2 do
        htable[i * 2 - 1] = tostring(i)
@ -95,12 +132,13 @@ function LG_funcs.add_hash(key)
    redis.apcall('HSET', key, unpack(htable))
 end

-function LG_funcs.mod_hash(key)
+function LG_funcs.mod_hash(key, keys)
    local idx = math.random(LG_funcs.csize)
    if idx % 2 == 1 then
        redis.apcall('HINCRBY', key, tostring(idx), 1)
    else
-        redis.apcall('HSET', key, tostring(idx), dragonfly.randstr(LG_funcs.esize))
+      local is_huge = keys[key]
+      redis.apcall('HSET', key, tostring(idx), randstr(is_huge))
    end
 end

@ -108,19 +146,28 @@ end

 function LG_funcs.add_zset(key, keys)
    -- TODO: We don't support ZDIFFSTORE
-    local blobs  = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
+    local is_huge = keys[key]
+    local blobs = randstr_sequence(is_huge)
+
    local ztable = {}
-    for i = 1,  LG_funcs.csize do
+
+    local limit = LG_funcs.csize
+    if is_huge then
+      limit = LG_funcs.huge_value_csize
+    end
+
+    for i = 1, limit do
        ztable[i * 2 - 1] = tostring(i)
        ztable[i * 2] = blobs[i]
    end
    redis.apcall('ZADD', key, unpack(ztable))
 end

-function LG_funcs.mod_zset(key, dbsize)
+function LG_funcs.mod_zset(key, keys)
    local action = math.random(1, 4)
    if action <= 2 then
-        redis.apcall('ZADD', key, math.random(0, LG_funcs.csize * 2), dragonfly.randstr(LG_funcs.esize))
+        local is_huge = keys[key]
+        redis.apcall('ZADD', key, math.random(0, LG_funcs.csize * 2), randstr(is_huge))
    elseif action == 3 then
        redis.apcall('ZPOPMAX', key)
    else
@ -153,3 +200,7 @@ function LG_funcs.mod_json(key, dbsize)
        redis.apcall('JSON.NUMINCRBY', key, '$.counters[' .. math.random(LG_funcs.csize ) .. ']', 1)
    end
 end
+
+function LG_funcs.get_huge_entries()
+  return huge_entries
+end
--- a/tests/dragonfly/seeder_test.py
+++ b/tests/dragonfly/seeder_test.py
@ -29,7 +29,15 @@ async def test_static_collection_size(async_client: aioredis.Redis):

    await async_client.flushall()

-    s = Seeder(units=1, key_target=10, data_size=10_000, collection_size=1, types=["LIST"])
+    s = Seeder(
+        units=1,
+        key_target=10,
+        data_size=10_000,
+        collection_size=1,
+        types=["LIST"],
+        huge_value_percentage=0,
+        huge_value_size=0,
+    )
    await s.run(async_client)
    await check_list()