diff --git a/tests/dragonfly/seeder/__init__.py b/tests/dragonfly/seeder/__init__.py index aeea29c40..01aefdb7e 100644 --- a/tests/dragonfly/seeder/__init__.py +++ b/tests/dragonfly/seeder/__init__.py @@ -19,6 +19,7 @@ class SeederBase: UID_COUNTER = 1 # multiple generators should not conflict on keys CACHED_SCRIPTS = {} DEFAULT_TYPES = ["STRING", "LIST", "SET", "HASH", "ZSET", "JSON"] + BIG_VALUE_TYPES = ["LIST", "SET", "HASH", "ZSET"] def __init__(self, types: typing.Optional[typing.List[str]] = None): self.uid = SeederBase.UID_COUNTER @@ -137,6 +138,10 @@ class Seeder(SeederBase): data_size=100, collection_size=None, types: typing.Optional[typing.List[str]] = None, + huge_value_percentage=1, + huge_value_size=1024, + # 1 huge entries per container/key as default + huge_value_csize=1, ): SeederBase.__init__(self, types) self.key_target = key_target @@ -146,6 +151,10 @@ class Seeder(SeederBase): else: self.collection_size = collection_size + self.huge_value_percentage = huge_value_percentage + self.huge_value_size = huge_value_size + self.huge_value_csize = huge_value_csize + self.units = [ Seeder.Unit( prefix=f"k-s{self.uid}u{i}-", @@ -166,6 +175,9 @@ class Seeder(SeederBase): target_deviation if target_deviation is not None else -1, self.data_size, self.collection_size, + self.huge_value_percentage, + self.huge_value_size, + self.huge_value_csize, ] sha = await client.script_load(Seeder._load_script("generate")) @@ -196,8 +208,14 @@ class Seeder(SeederBase): unit.stop_key if using_stopkey else "", ] + args - unit.counter = await client.evalsha(sha, 0, *args) + result = await client.evalsha(sha, 0, *args) + result = result.split() + unit.counter = int(result[0]) + huge_keys = int(result[1]) + huge_entries = int(result[2]) - logging.debug( - f"running unit {unit.prefix}/{unit.type} took {time.time() - s}, target {args[4+0]}" - ) + msg = f"running unit {unit.prefix}/{unit.type} took {time.time() - s}, target {args[4+0]}" + if huge_keys > 0: + msg = f"{msg}. Total huge keys added {huge_keys} with {args[11]} elements each. Total extra modified huge entries {huge_entries}." + + logging.debug(msg) diff --git a/tests/dragonfly/seeder/script-generate.lua b/tests/dragonfly/seeder/script-generate.lua index d1f818425..32b2e1d4c 100644 --- a/tests/dragonfly/seeder/script-generate.lua +++ b/tests/dragonfly/seeder/script-generate.lua @@ -18,21 +18,48 @@ local total_ops = tonumber(ARGV[6]) local min_dev = tonumber(ARGV[7]) local data_size = tonumber(ARGV[8]) local collection_size = tonumber(ARGV[9]) +-- Probability of each key in key_target to be a big value +local huge_value_percentage = tonumber(ARGV[10]) +local huge_value_size = tonumber(ARGV[11]) +local huge_value_csize = tonumber(ARGV[12]) -- collect all keys belonging to this script -- assumes exclusive ownership local keys = LU_collect_keys(prefix, type) -LG_funcs.init(data_size, collection_size) +LG_funcs.init(data_size, collection_size, huge_value_percentage, huge_value_size, huge_value_csize) local addfunc = LG_funcs['add_' .. string.lower(type)] local modfunc = LG_funcs['mod_' .. string.lower(type)] +local huge_entries = LG_funcs["get_huge_entries"] + +local huge_keys = 0 + +local function huge_entry() + local ratio = LG_funcs.huge_value_percentage / 100 + -- [0, 1] + local rand = math.random() + local huge_entry = (ratio > rand) + return huge_entry +end local function action_add() local key = prefix .. tostring(key_counter) - key_counter = key_counter + 1 + local op_type = string.lower(type) + local is_huge = false + -- `string` and `json` huge entries are not supported so + -- we don't roll a dice to decide if they are huge or not + if op_type ~= "string" and op_type ~= "json" then + is_huge = huge_entry() + end + + key_counter = key_counter + 1 + if is_huge then + huge_keys = huge_keys + 1 + end - addfunc(key, keys) table.insert(keys, key) + keys[key] = is_huge + addfunc(key, keys) end local function action_mod() @@ -84,7 +111,8 @@ while true do -- update probability only every 10 iterations if counter % 10 == 0 then -- calculate intensity (not normalized probabilities) - -- please see attached plots in PR to undertand convergence + -- please see attached plots in PR to understand convergence + -- https://github.com/dragonflydb/dragonfly/pull/2556 -- the add intensity is monotonically decreasing with keycount growing, -- the delete intensity is monotonically increasing with keycount growing, @@ -121,4 +149,4 @@ if stop_key ~= '' then redis.call('DEL', stop_key) end -return key_counter +return tostring(key_counter) .. " " .. tostring(huge_keys) .. " " .. tostring(huge_entries()) diff --git a/tests/dragonfly/seeder/script-genlib.lua b/tests/dragonfly/seeder/script-genlib.lua index 937f26fe4..fb497c17f 100644 --- a/tests/dragonfly/seeder/script-genlib.lua +++ b/tests/dragonfly/seeder/script-genlib.lua @@ -1,9 +1,36 @@ local LG_funcs = {} -function LG_funcs.init(dsize, csize) +function LG_funcs.init(dsize, csize, large_val_perc, large_val_sz, huge_value_csize) LG_funcs.dsize = dsize LG_funcs.csize = csize LG_funcs.esize = math.ceil(dsize / csize) + LG_funcs.huge_value_percentage = large_val_perc + LG_funcs.huge_value_size = large_val_sz + LG_funcs.huge_value_csize = huge_value_csize +end + +local huge_entries = 0 + +local function randstr(huge_entry) + local str + if huge_entry then + str = dragonfly.randstr(LG_funcs.huge_value_size) + huge_entries = huge_entries + 1 + else + str = dragonfly.randstr(LG_funcs.esize) + end + return str +end + +local function randstr_sequence(huge_entry) + local strs + if huge_entry then + strs = dragonfly.randstr(LG_funcs.huge_value_size, LG_funcs.huge_value_csize) + huge_entries = huge_entries + 1 + else + strs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize) + end + return strs end -- strings @@ -27,23 +54,24 @@ end -- lists -- store list of random blobs of default container/element sizes -function LG_funcs.add_list(key) - local elements = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize) - redis.apcall('LPUSH', key, unpack(elements)) +function LG_funcs.add_list(key, keys) + local is_huge = keys[key] + redis.apcall('LPUSH', key, unpack(randstr_sequence(is_huge))) end -function LG_funcs.mod_list(key) +function LG_funcs.mod_list(key, keys) -- equally likely pops and pushes, we rely on the list size being large enough -- to "highly likely" not get emptied out by consequitve pops + local is_huge = keys[key] local action = math.random(1, 4) if action == 1 then redis.apcall('RPOP', key) elseif action == 2 then redis.apcall('LPOP', key) elseif action == 3 then - redis.apcall('LPUSH', key, dragonfly.randstr(LG_funcs.esize)) + redis.apcall('LPUSH', key, randstr(is_huge)) else - redis.apcall('RPUSH', key, dragonfly.randstr(LG_funcs.esize)) + redis.apcall('RPUSH', key, randstr(is_huge)) end end @@ -62,17 +90,18 @@ function LG_funcs.add_set(key, keys) end redis.apcall('SDIFFSTORE', key, keys[i1], keys[i2]) else - local elements = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize) - redis.apcall('SADD', key, unpack(elements)) + local is_huge = keys[key] + redis.apcall('SADD', key, unpack(randstr_sequence(is_huge))) end end -function LG_funcs.mod_set(key) +function LG_funcs.mod_set(key, keys) -- equally likely pops and additions if math.random() < 0.5 then redis.apcall('SPOP', key) else - redis.apcall('SADD', key, dragonfly.randstr(LG_funcs.esize)) + local is_huge = keys[key] + redis.apcall('SADD', key, randstr(is_huge)) end end @@ -81,8 +110,16 @@ end -- store {to_string(i): value for i in [1, csize]}, -- where `value` is a random string for even indices and a number for odd indices -function LG_funcs.add_hash(key) - local blobs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize / 2) +function LG_funcs.add_hash(key, keys) + local blobs + local is_huge = keys[key] + if is_huge then + blobs = dragonfly.randstr(LG_funcs.huge_value_size, LG_funcs.csize / 2) + huge_entries = huge_entries + 1 + else + blobs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize / 2) + end + local htable = {} for i = 1, LG_funcs.csize, 2 do htable[i * 2 - 1] = tostring(i) @@ -95,12 +132,13 @@ function LG_funcs.add_hash(key) redis.apcall('HSET', key, unpack(htable)) end -function LG_funcs.mod_hash(key) +function LG_funcs.mod_hash(key, keys) local idx = math.random(LG_funcs.csize) if idx % 2 == 1 then redis.apcall('HINCRBY', key, tostring(idx), 1) else - redis.apcall('HSET', key, tostring(idx), dragonfly.randstr(LG_funcs.esize)) + local is_huge = keys[key] + redis.apcall('HSET', key, tostring(idx), randstr(is_huge)) end end @@ -108,19 +146,28 @@ end function LG_funcs.add_zset(key, keys) -- TODO: We don't support ZDIFFSTORE - local blobs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize) + local is_huge = keys[key] + local blobs = randstr_sequence(is_huge) + local ztable = {} - for i = 1, LG_funcs.csize do + + local limit = LG_funcs.csize + if is_huge then + limit = LG_funcs.huge_value_csize + end + + for i = 1, limit do ztable[i * 2 - 1] = tostring(i) ztable[i * 2] = blobs[i] end redis.apcall('ZADD', key, unpack(ztable)) end -function LG_funcs.mod_zset(key, dbsize) +function LG_funcs.mod_zset(key, keys) local action = math.random(1, 4) if action <= 2 then - redis.apcall('ZADD', key, math.random(0, LG_funcs.csize * 2), dragonfly.randstr(LG_funcs.esize)) + local is_huge = keys[key] + redis.apcall('ZADD', key, math.random(0, LG_funcs.csize * 2), randstr(is_huge)) elseif action == 3 then redis.apcall('ZPOPMAX', key) else @@ -153,3 +200,7 @@ function LG_funcs.mod_json(key, dbsize) redis.apcall('JSON.NUMINCRBY', key, '$.counters[' .. math.random(LG_funcs.csize ) .. ']', 1) end end + +function LG_funcs.get_huge_entries() + return huge_entries +end diff --git a/tests/dragonfly/seeder_test.py b/tests/dragonfly/seeder_test.py index 5086cc399..3c1913f88 100644 --- a/tests/dragonfly/seeder_test.py +++ b/tests/dragonfly/seeder_test.py @@ -29,7 +29,15 @@ async def test_static_collection_size(async_client: aioredis.Redis): await async_client.flushall() - s = Seeder(units=1, key_target=10, data_size=10_000, collection_size=1, types=["LIST"]) + s = Seeder( + units=1, + key_target=10, + data_size=10_000, + collection_size=1, + types=["LIST"], + huge_value_percentage=0, + huge_value_size=0, + ) await s.run(async_client) await check_list()