mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2024-12-14 11:58:02 +00:00
feat: add support for big values in SeederV2 (#4222)
* add support for big values in SeederV2 --------- Signed-off-by: kostas <kostas@dragonflydb.io>
This commit is contained in:
parent
ad73e18f6d
commit
7ccad66fb1
4 changed files with 134 additions and 29 deletions
|
@ -19,6 +19,7 @@ class SeederBase:
|
|||
UID_COUNTER = 1 # multiple generators should not conflict on keys
|
||||
CACHED_SCRIPTS = {}
|
||||
DEFAULT_TYPES = ["STRING", "LIST", "SET", "HASH", "ZSET", "JSON"]
|
||||
BIG_VALUE_TYPES = ["LIST", "SET", "HASH", "ZSET"]
|
||||
|
||||
def __init__(self, types: typing.Optional[typing.List[str]] = None):
|
||||
self.uid = SeederBase.UID_COUNTER
|
||||
|
@ -137,6 +138,10 @@ class Seeder(SeederBase):
|
|||
data_size=100,
|
||||
collection_size=None,
|
||||
types: typing.Optional[typing.List[str]] = None,
|
||||
huge_value_percentage=1,
|
||||
huge_value_size=1024,
|
||||
# 1 huge entries per container/key as default
|
||||
huge_value_csize=1,
|
||||
):
|
||||
SeederBase.__init__(self, types)
|
||||
self.key_target = key_target
|
||||
|
@ -146,6 +151,10 @@ class Seeder(SeederBase):
|
|||
else:
|
||||
self.collection_size = collection_size
|
||||
|
||||
self.huge_value_percentage = huge_value_percentage
|
||||
self.huge_value_size = huge_value_size
|
||||
self.huge_value_csize = huge_value_csize
|
||||
|
||||
self.units = [
|
||||
Seeder.Unit(
|
||||
prefix=f"k-s{self.uid}u{i}-",
|
||||
|
@ -166,6 +175,9 @@ class Seeder(SeederBase):
|
|||
target_deviation if target_deviation is not None else -1,
|
||||
self.data_size,
|
||||
self.collection_size,
|
||||
self.huge_value_percentage,
|
||||
self.huge_value_size,
|
||||
self.huge_value_csize,
|
||||
]
|
||||
|
||||
sha = await client.script_load(Seeder._load_script("generate"))
|
||||
|
@ -196,8 +208,14 @@ class Seeder(SeederBase):
|
|||
unit.stop_key if using_stopkey else "",
|
||||
] + args
|
||||
|
||||
unit.counter = await client.evalsha(sha, 0, *args)
|
||||
result = await client.evalsha(sha, 0, *args)
|
||||
result = result.split()
|
||||
unit.counter = int(result[0])
|
||||
huge_keys = int(result[1])
|
||||
huge_entries = int(result[2])
|
||||
|
||||
logging.debug(
|
||||
f"running unit {unit.prefix}/{unit.type} took {time.time() - s}, target {args[4+0]}"
|
||||
)
|
||||
msg = f"running unit {unit.prefix}/{unit.type} took {time.time() - s}, target {args[4+0]}"
|
||||
if huge_keys > 0:
|
||||
msg = f"{msg}. Total huge keys added {huge_keys} with {args[11]} elements each. Total extra modified huge entries {huge_entries}."
|
||||
|
||||
logging.debug(msg)
|
||||
|
|
|
@ -18,21 +18,48 @@ local total_ops = tonumber(ARGV[6])
|
|||
local min_dev = tonumber(ARGV[7])
|
||||
local data_size = tonumber(ARGV[8])
|
||||
local collection_size = tonumber(ARGV[9])
|
||||
-- Probability of each key in key_target to be a big value
|
||||
local huge_value_percentage = tonumber(ARGV[10])
|
||||
local huge_value_size = tonumber(ARGV[11])
|
||||
local huge_value_csize = tonumber(ARGV[12])
|
||||
|
||||
-- collect all keys belonging to this script
|
||||
-- assumes exclusive ownership
|
||||
local keys = LU_collect_keys(prefix, type)
|
||||
|
||||
LG_funcs.init(data_size, collection_size)
|
||||
LG_funcs.init(data_size, collection_size, huge_value_percentage, huge_value_size, huge_value_csize)
|
||||
local addfunc = LG_funcs['add_' .. string.lower(type)]
|
||||
local modfunc = LG_funcs['mod_' .. string.lower(type)]
|
||||
local huge_entries = LG_funcs["get_huge_entries"]
|
||||
|
||||
local huge_keys = 0
|
||||
|
||||
local function huge_entry()
|
||||
local ratio = LG_funcs.huge_value_percentage / 100
|
||||
-- [0, 1]
|
||||
local rand = math.random()
|
||||
local huge_entry = (ratio > rand)
|
||||
return huge_entry
|
||||
end
|
||||
|
||||
local function action_add()
|
||||
local key = prefix .. tostring(key_counter)
|
||||
key_counter = key_counter + 1
|
||||
local op_type = string.lower(type)
|
||||
local is_huge = false
|
||||
-- `string` and `json` huge entries are not supported so
|
||||
-- we don't roll a dice to decide if they are huge or not
|
||||
if op_type ~= "string" and op_type ~= "json" then
|
||||
is_huge = huge_entry()
|
||||
end
|
||||
|
||||
key_counter = key_counter + 1
|
||||
if is_huge then
|
||||
huge_keys = huge_keys + 1
|
||||
end
|
||||
|
||||
addfunc(key, keys)
|
||||
table.insert(keys, key)
|
||||
keys[key] = is_huge
|
||||
addfunc(key, keys)
|
||||
end
|
||||
|
||||
local function action_mod()
|
||||
|
@ -84,7 +111,8 @@ while true do
|
|||
-- update probability only every 10 iterations
|
||||
if counter % 10 == 0 then
|
||||
-- calculate intensity (not normalized probabilities)
|
||||
-- please see attached plots in PR to undertand convergence
|
||||
-- please see attached plots in PR to understand convergence
|
||||
-- https://github.com/dragonflydb/dragonfly/pull/2556
|
||||
|
||||
-- the add intensity is monotonically decreasing with keycount growing,
|
||||
-- the delete intensity is monotonically increasing with keycount growing,
|
||||
|
@ -121,4 +149,4 @@ if stop_key ~= '' then
|
|||
redis.call('DEL', stop_key)
|
||||
end
|
||||
|
||||
return key_counter
|
||||
return tostring(key_counter) .. " " .. tostring(huge_keys) .. " " .. tostring(huge_entries())
|
||||
|
|
|
@ -1,9 +1,36 @@
|
|||
local LG_funcs = {}
|
||||
|
||||
function LG_funcs.init(dsize, csize)
|
||||
function LG_funcs.init(dsize, csize, large_val_perc, large_val_sz, huge_value_csize)
|
||||
LG_funcs.dsize = dsize
|
||||
LG_funcs.csize = csize
|
||||
LG_funcs.esize = math.ceil(dsize / csize)
|
||||
LG_funcs.huge_value_percentage = large_val_perc
|
||||
LG_funcs.huge_value_size = large_val_sz
|
||||
LG_funcs.huge_value_csize = huge_value_csize
|
||||
end
|
||||
|
||||
local huge_entries = 0
|
||||
|
||||
local function randstr(huge_entry)
|
||||
local str
|
||||
if huge_entry then
|
||||
str = dragonfly.randstr(LG_funcs.huge_value_size)
|
||||
huge_entries = huge_entries + 1
|
||||
else
|
||||
str = dragonfly.randstr(LG_funcs.esize)
|
||||
end
|
||||
return str
|
||||
end
|
||||
|
||||
local function randstr_sequence(huge_entry)
|
||||
local strs
|
||||
if huge_entry then
|
||||
strs = dragonfly.randstr(LG_funcs.huge_value_size, LG_funcs.huge_value_csize)
|
||||
huge_entries = huge_entries + 1
|
||||
else
|
||||
strs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
|
||||
end
|
||||
return strs
|
||||
end
|
||||
|
||||
-- strings
|
||||
|
@ -27,23 +54,24 @@ end
|
|||
-- lists
|
||||
-- store list of random blobs of default container/element sizes
|
||||
|
||||
function LG_funcs.add_list(key)
|
||||
local elements = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
|
||||
redis.apcall('LPUSH', key, unpack(elements))
|
||||
function LG_funcs.add_list(key, keys)
|
||||
local is_huge = keys[key]
|
||||
redis.apcall('LPUSH', key, unpack(randstr_sequence(is_huge)))
|
||||
end
|
||||
|
||||
function LG_funcs.mod_list(key)
|
||||
function LG_funcs.mod_list(key, keys)
|
||||
-- equally likely pops and pushes, we rely on the list size being large enough
|
||||
-- to "highly likely" not get emptied out by consequitve pops
|
||||
local is_huge = keys[key]
|
||||
local action = math.random(1, 4)
|
||||
if action == 1 then
|
||||
redis.apcall('RPOP', key)
|
||||
elseif action == 2 then
|
||||
redis.apcall('LPOP', key)
|
||||
elseif action == 3 then
|
||||
redis.apcall('LPUSH', key, dragonfly.randstr(LG_funcs.esize))
|
||||
redis.apcall('LPUSH', key, randstr(is_huge))
|
||||
else
|
||||
redis.apcall('RPUSH', key, dragonfly.randstr(LG_funcs.esize))
|
||||
redis.apcall('RPUSH', key, randstr(is_huge))
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -62,17 +90,18 @@ function LG_funcs.add_set(key, keys)
|
|||
end
|
||||
redis.apcall('SDIFFSTORE', key, keys[i1], keys[i2])
|
||||
else
|
||||
local elements = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
|
||||
redis.apcall('SADD', key, unpack(elements))
|
||||
local is_huge = keys[key]
|
||||
redis.apcall('SADD', key, unpack(randstr_sequence(is_huge)))
|
||||
end
|
||||
end
|
||||
|
||||
function LG_funcs.mod_set(key)
|
||||
function LG_funcs.mod_set(key, keys)
|
||||
-- equally likely pops and additions
|
||||
if math.random() < 0.5 then
|
||||
redis.apcall('SPOP', key)
|
||||
else
|
||||
redis.apcall('SADD', key, dragonfly.randstr(LG_funcs.esize))
|
||||
local is_huge = keys[key]
|
||||
redis.apcall('SADD', key, randstr(is_huge))
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -81,8 +110,16 @@ end
|
|||
-- store {to_string(i): value for i in [1, csize]},
|
||||
-- where `value` is a random string for even indices and a number for odd indices
|
||||
|
||||
function LG_funcs.add_hash(key)
|
||||
local blobs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize / 2)
|
||||
function LG_funcs.add_hash(key, keys)
|
||||
local blobs
|
||||
local is_huge = keys[key]
|
||||
if is_huge then
|
||||
blobs = dragonfly.randstr(LG_funcs.huge_value_size, LG_funcs.csize / 2)
|
||||
huge_entries = huge_entries + 1
|
||||
else
|
||||
blobs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize / 2)
|
||||
end
|
||||
|
||||
local htable = {}
|
||||
for i = 1, LG_funcs.csize, 2 do
|
||||
htable[i * 2 - 1] = tostring(i)
|
||||
|
@ -95,12 +132,13 @@ function LG_funcs.add_hash(key)
|
|||
redis.apcall('HSET', key, unpack(htable))
|
||||
end
|
||||
|
||||
function LG_funcs.mod_hash(key)
|
||||
function LG_funcs.mod_hash(key, keys)
|
||||
local idx = math.random(LG_funcs.csize)
|
||||
if idx % 2 == 1 then
|
||||
redis.apcall('HINCRBY', key, tostring(idx), 1)
|
||||
else
|
||||
redis.apcall('HSET', key, tostring(idx), dragonfly.randstr(LG_funcs.esize))
|
||||
local is_huge = keys[key]
|
||||
redis.apcall('HSET', key, tostring(idx), randstr(is_huge))
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -108,19 +146,28 @@ end
|
|||
|
||||
function LG_funcs.add_zset(key, keys)
|
||||
-- TODO: We don't support ZDIFFSTORE
|
||||
local blobs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
|
||||
local is_huge = keys[key]
|
||||
local blobs = randstr_sequence(is_huge)
|
||||
|
||||
local ztable = {}
|
||||
for i = 1, LG_funcs.csize do
|
||||
|
||||
local limit = LG_funcs.csize
|
||||
if is_huge then
|
||||
limit = LG_funcs.huge_value_csize
|
||||
end
|
||||
|
||||
for i = 1, limit do
|
||||
ztable[i * 2 - 1] = tostring(i)
|
||||
ztable[i * 2] = blobs[i]
|
||||
end
|
||||
redis.apcall('ZADD', key, unpack(ztable))
|
||||
end
|
||||
|
||||
function LG_funcs.mod_zset(key, dbsize)
|
||||
function LG_funcs.mod_zset(key, keys)
|
||||
local action = math.random(1, 4)
|
||||
if action <= 2 then
|
||||
redis.apcall('ZADD', key, math.random(0, LG_funcs.csize * 2), dragonfly.randstr(LG_funcs.esize))
|
||||
local is_huge = keys[key]
|
||||
redis.apcall('ZADD', key, math.random(0, LG_funcs.csize * 2), randstr(is_huge))
|
||||
elseif action == 3 then
|
||||
redis.apcall('ZPOPMAX', key)
|
||||
else
|
||||
|
@ -153,3 +200,7 @@ function LG_funcs.mod_json(key, dbsize)
|
|||
redis.apcall('JSON.NUMINCRBY', key, '$.counters[' .. math.random(LG_funcs.csize ) .. ']', 1)
|
||||
end
|
||||
end
|
||||
|
||||
function LG_funcs.get_huge_entries()
|
||||
return huge_entries
|
||||
end
|
||||
|
|
|
@ -29,7 +29,15 @@ async def test_static_collection_size(async_client: aioredis.Redis):
|
|||
|
||||
await async_client.flushall()
|
||||
|
||||
s = Seeder(units=1, key_target=10, data_size=10_000, collection_size=1, types=["LIST"])
|
||||
s = Seeder(
|
||||
units=1,
|
||||
key_target=10,
|
||||
data_size=10_000,
|
||||
collection_size=1,
|
||||
types=["LIST"],
|
||||
huge_value_percentage=0,
|
||||
huge_value_size=0,
|
||||
)
|
||||
await s.run(async_client)
|
||||
await check_list()
|
||||
|
||||
|
|
Loading…
Reference in a new issue