1
0
Fork 0
mirror of https://github.com/dragonflydb/dragonfly.git synced 2024-12-14 11:58:02 +00:00

feat: add support for big values in SeederV2 (#4222)

* add support for big values in SeederV2

---------

Signed-off-by: kostas <kostas@dragonflydb.io>
This commit is contained in:
Kostas Kyrimis 2024-12-05 09:47:41 +01:00 committed by GitHub
parent ad73e18f6d
commit 7ccad66fb1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 134 additions and 29 deletions

View file

@ -19,6 +19,7 @@ class SeederBase:
UID_COUNTER = 1 # multiple generators should not conflict on keys
CACHED_SCRIPTS = {}
DEFAULT_TYPES = ["STRING", "LIST", "SET", "HASH", "ZSET", "JSON"]
BIG_VALUE_TYPES = ["LIST", "SET", "HASH", "ZSET"]
def __init__(self, types: typing.Optional[typing.List[str]] = None):
self.uid = SeederBase.UID_COUNTER
@ -137,6 +138,10 @@ class Seeder(SeederBase):
data_size=100,
collection_size=None,
types: typing.Optional[typing.List[str]] = None,
huge_value_percentage=1,
huge_value_size=1024,
# 1 huge entries per container/key as default
huge_value_csize=1,
):
SeederBase.__init__(self, types)
self.key_target = key_target
@ -146,6 +151,10 @@ class Seeder(SeederBase):
else:
self.collection_size = collection_size
self.huge_value_percentage = huge_value_percentage
self.huge_value_size = huge_value_size
self.huge_value_csize = huge_value_csize
self.units = [
Seeder.Unit(
prefix=f"k-s{self.uid}u{i}-",
@ -166,6 +175,9 @@ class Seeder(SeederBase):
target_deviation if target_deviation is not None else -1,
self.data_size,
self.collection_size,
self.huge_value_percentage,
self.huge_value_size,
self.huge_value_csize,
]
sha = await client.script_load(Seeder._load_script("generate"))
@ -196,8 +208,14 @@ class Seeder(SeederBase):
unit.stop_key if using_stopkey else "",
] + args
unit.counter = await client.evalsha(sha, 0, *args)
result = await client.evalsha(sha, 0, *args)
result = result.split()
unit.counter = int(result[0])
huge_keys = int(result[1])
huge_entries = int(result[2])
logging.debug(
f"running unit {unit.prefix}/{unit.type} took {time.time() - s}, target {args[4+0]}"
)
msg = f"running unit {unit.prefix}/{unit.type} took {time.time() - s}, target {args[4+0]}"
if huge_keys > 0:
msg = f"{msg}. Total huge keys added {huge_keys} with {args[11]} elements each. Total extra modified huge entries {huge_entries}."
logging.debug(msg)

View file

@ -18,21 +18,48 @@ local total_ops = tonumber(ARGV[6])
local min_dev = tonumber(ARGV[7])
local data_size = tonumber(ARGV[8])
local collection_size = tonumber(ARGV[9])
-- Probability of each key in key_target to be a big value
local huge_value_percentage = tonumber(ARGV[10])
local huge_value_size = tonumber(ARGV[11])
local huge_value_csize = tonumber(ARGV[12])
-- collect all keys belonging to this script
-- assumes exclusive ownership
local keys = LU_collect_keys(prefix, type)
LG_funcs.init(data_size, collection_size)
LG_funcs.init(data_size, collection_size, huge_value_percentage, huge_value_size, huge_value_csize)
local addfunc = LG_funcs['add_' .. string.lower(type)]
local modfunc = LG_funcs['mod_' .. string.lower(type)]
local huge_entries = LG_funcs["get_huge_entries"]
local huge_keys = 0
local function huge_entry()
local ratio = LG_funcs.huge_value_percentage / 100
-- [0, 1]
local rand = math.random()
local huge_entry = (ratio > rand)
return huge_entry
end
local function action_add()
local key = prefix .. tostring(key_counter)
key_counter = key_counter + 1
local op_type = string.lower(type)
local is_huge = false
-- `string` and `json` huge entries are not supported so
-- we don't roll a dice to decide if they are huge or not
if op_type ~= "string" and op_type ~= "json" then
is_huge = huge_entry()
end
key_counter = key_counter + 1
if is_huge then
huge_keys = huge_keys + 1
end
addfunc(key, keys)
table.insert(keys, key)
keys[key] = is_huge
addfunc(key, keys)
end
local function action_mod()
@ -84,7 +111,8 @@ while true do
-- update probability only every 10 iterations
if counter % 10 == 0 then
-- calculate intensity (not normalized probabilities)
-- please see attached plots in PR to undertand convergence
-- please see attached plots in PR to understand convergence
-- https://github.com/dragonflydb/dragonfly/pull/2556
-- the add intensity is monotonically decreasing with keycount growing,
-- the delete intensity is monotonically increasing with keycount growing,
@ -121,4 +149,4 @@ if stop_key ~= '' then
redis.call('DEL', stop_key)
end
return key_counter
return tostring(key_counter) .. " " .. tostring(huge_keys) .. " " .. tostring(huge_entries())

View file

@ -1,9 +1,36 @@
local LG_funcs = {}
function LG_funcs.init(dsize, csize)
function LG_funcs.init(dsize, csize, large_val_perc, large_val_sz, huge_value_csize)
LG_funcs.dsize = dsize
LG_funcs.csize = csize
LG_funcs.esize = math.ceil(dsize / csize)
LG_funcs.huge_value_percentage = large_val_perc
LG_funcs.huge_value_size = large_val_sz
LG_funcs.huge_value_csize = huge_value_csize
end
local huge_entries = 0
local function randstr(huge_entry)
local str
if huge_entry then
str = dragonfly.randstr(LG_funcs.huge_value_size)
huge_entries = huge_entries + 1
else
str = dragonfly.randstr(LG_funcs.esize)
end
return str
end
local function randstr_sequence(huge_entry)
local strs
if huge_entry then
strs = dragonfly.randstr(LG_funcs.huge_value_size, LG_funcs.huge_value_csize)
huge_entries = huge_entries + 1
else
strs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
end
return strs
end
-- strings
@ -27,23 +54,24 @@ end
-- lists
-- store list of random blobs of default container/element sizes
function LG_funcs.add_list(key)
local elements = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
redis.apcall('LPUSH', key, unpack(elements))
function LG_funcs.add_list(key, keys)
local is_huge = keys[key]
redis.apcall('LPUSH', key, unpack(randstr_sequence(is_huge)))
end
function LG_funcs.mod_list(key)
function LG_funcs.mod_list(key, keys)
-- equally likely pops and pushes, we rely on the list size being large enough
-- to "highly likely" not get emptied out by consequitve pops
local is_huge = keys[key]
local action = math.random(1, 4)
if action == 1 then
redis.apcall('RPOP', key)
elseif action == 2 then
redis.apcall('LPOP', key)
elseif action == 3 then
redis.apcall('LPUSH', key, dragonfly.randstr(LG_funcs.esize))
redis.apcall('LPUSH', key, randstr(is_huge))
else
redis.apcall('RPUSH', key, dragonfly.randstr(LG_funcs.esize))
redis.apcall('RPUSH', key, randstr(is_huge))
end
end
@ -62,17 +90,18 @@ function LG_funcs.add_set(key, keys)
end
redis.apcall('SDIFFSTORE', key, keys[i1], keys[i2])
else
local elements = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
redis.apcall('SADD', key, unpack(elements))
local is_huge = keys[key]
redis.apcall('SADD', key, unpack(randstr_sequence(is_huge)))
end
end
function LG_funcs.mod_set(key)
function LG_funcs.mod_set(key, keys)
-- equally likely pops and additions
if math.random() < 0.5 then
redis.apcall('SPOP', key)
else
redis.apcall('SADD', key, dragonfly.randstr(LG_funcs.esize))
local is_huge = keys[key]
redis.apcall('SADD', key, randstr(is_huge))
end
end
@ -81,8 +110,16 @@ end
-- store {to_string(i): value for i in [1, csize]},
-- where `value` is a random string for even indices and a number for odd indices
function LG_funcs.add_hash(key)
local blobs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize / 2)
function LG_funcs.add_hash(key, keys)
local blobs
local is_huge = keys[key]
if is_huge then
blobs = dragonfly.randstr(LG_funcs.huge_value_size, LG_funcs.csize / 2)
huge_entries = huge_entries + 1
else
blobs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize / 2)
end
local htable = {}
for i = 1, LG_funcs.csize, 2 do
htable[i * 2 - 1] = tostring(i)
@ -95,12 +132,13 @@ function LG_funcs.add_hash(key)
redis.apcall('HSET', key, unpack(htable))
end
function LG_funcs.mod_hash(key)
function LG_funcs.mod_hash(key, keys)
local idx = math.random(LG_funcs.csize)
if idx % 2 == 1 then
redis.apcall('HINCRBY', key, tostring(idx), 1)
else
redis.apcall('HSET', key, tostring(idx), dragonfly.randstr(LG_funcs.esize))
local is_huge = keys[key]
redis.apcall('HSET', key, tostring(idx), randstr(is_huge))
end
end
@ -108,19 +146,28 @@ end
function LG_funcs.add_zset(key, keys)
-- TODO: We don't support ZDIFFSTORE
local blobs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
local is_huge = keys[key]
local blobs = randstr_sequence(is_huge)
local ztable = {}
for i = 1, LG_funcs.csize do
local limit = LG_funcs.csize
if is_huge then
limit = LG_funcs.huge_value_csize
end
for i = 1, limit do
ztable[i * 2 - 1] = tostring(i)
ztable[i * 2] = blobs[i]
end
redis.apcall('ZADD', key, unpack(ztable))
end
function LG_funcs.mod_zset(key, dbsize)
function LG_funcs.mod_zset(key, keys)
local action = math.random(1, 4)
if action <= 2 then
redis.apcall('ZADD', key, math.random(0, LG_funcs.csize * 2), dragonfly.randstr(LG_funcs.esize))
local is_huge = keys[key]
redis.apcall('ZADD', key, math.random(0, LG_funcs.csize * 2), randstr(is_huge))
elseif action == 3 then
redis.apcall('ZPOPMAX', key)
else
@ -153,3 +200,7 @@ function LG_funcs.mod_json(key, dbsize)
redis.apcall('JSON.NUMINCRBY', key, '$.counters[' .. math.random(LG_funcs.csize ) .. ']', 1)
end
end
function LG_funcs.get_huge_entries()
return huge_entries
end

View file

@ -29,7 +29,15 @@ async def test_static_collection_size(async_client: aioredis.Redis):
await async_client.flushall()
s = Seeder(units=1, key_target=10, data_size=10_000, collection_size=1, types=["LIST"])
s = Seeder(
units=1,
key_target=10,
data_size=10_000,
collection_size=1,
types=["LIST"],
huge_value_percentage=0,
huge_value_size=0,
)
await s.run(async_client)
await check_list()