1
0
Fork 0
mirror of https://github.com/dragonflydb/dragonfly.git synced 2024-12-14 11:58:02 +00:00

fix(lua): Fix a deadlock happenning when calling a lua script (#726)

The scenario is described in a unit test that reproduces the issue with high chance.
Also added dragonfly_test in repeat=100 mode to CI.

Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
Roman Gershman 2023-01-25 10:16:52 +02:00 committed by GitHub
parent 59bfecad69
commit 90eb1d81b7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 34 additions and 5 deletions

View file

@ -78,7 +78,7 @@ jobs:
echo Run ctest -V -L DFLY
#GLOG_logtostderr=1 GLOG_vmodule=transaction=1,engine_shard_set=1
GLOG_logtostderr=1 GLOG_vmodule=rdb_load=1,rdb_save=1,snapshot=1 ctest -V -L DFLY
./dragonfly_test --mem_defrag_threshold=0.05 # trying to catch issue with defrag
./dragonfly_test --gtest_repeat=100
# GLOG_logtostderr=1 GLOG_vmodule=transaction=1,engine_shard_set=1 CTEST_OUTPUT_ON_FAILURE=1 ninja server/test
lint-test-chart:
runs-on: ubuntu-latest

View file

@ -471,6 +471,33 @@ TEST_F(DflyEngineTest, EvalBug713) {
fb0.Join();
}
// Tests deadlock that happenned due to a fact that trans->Schedule was called
// before interpreter->Lock().
//
// The problematic scenario:
// 1. transaction 1 schedules itself and blocks on an interpreter lock
// 2. transaction 2 schedules itself, but meanwhile an interpreter unlocks itself and
// transaction 2 grabs the lock but can not progress due to transaction 1 already
// scheduled before.
TEST_F(DflyEngineTest, EvalBug713b) {
const char* script = "return redis.call('get', KEYS[1])";
const uint32_t kNumFibers = 20;
fibers_ext::Fiber fibers[kNumFibers];
for (unsigned j = 0; j < kNumFibers; ++j) {
fibers[j] = pp_->at(1)->LaunchFiber([=, this] {
for (unsigned i = 0; i < 50; ++i) {
Run(StrCat("fb", j), {"eval", script, "3", kKeySid0, kKeySid1, kKeySid2});
}
});
}
for (unsigned j = 0; j < kNumFibers; ++j) {
fibers[j].Join();
}
}
TEST_F(DflyEngineTest, EvalSha) {
auto resp = Run({"script", "load", "return 5"});
EXPECT_THAT(resp, ArgType(RespExpr::STRING));

View file

@ -906,6 +906,8 @@ void Service::Unwatch(CmdArgList args, ConnectionContext* cntx) {
void Service::CallFromScript(CmdArgList args, ObjectExplorer* reply, ConnectionContext* cntx) {
DCHECK(cntx->transaction);
DVLOG(1) << "CallFromScript " << cntx->transaction->DebugId() << " " << ArgS(args, 0);
InterpreterReplier replier(reply);
facade::SinkReplyBuilder* orig = cntx->Inject(&replier);
@ -1013,11 +1015,11 @@ void Service::EvalInternal(const EvalArgs& eval_args, Interpreter* interpreter,
}
DCHECK(cntx->transaction);
auto lk = interpreter->Lock();
if (!eval_args.keys.empty())
cntx->transaction->Schedule();
auto lk = interpreter->Lock();
interpreter->SetGlobalArray("KEYS", eval_args.keys);
interpreter->SetGlobalArray("ARGV", eval_args.args);
interpreter->SetRedisFunc(
@ -1039,12 +1041,12 @@ void Service::EvalInternal(const EvalArgs& eval_args, Interpreter* interpreter,
CHECK(result == Interpreter::RUN_OK);
EvalSerializer ser{static_cast<RedisReplyBuilder*>(cntx->reply_builder())};
if (!interpreter->IsResultSafe()) {
(*cntx)->SendError("reached lua stack limit");
} else {
interpreter->SerializeResult(&ser);
}
interpreter->ResetStack();
}

View file

@ -904,7 +904,7 @@ pair<bool, bool> Transaction::ScheduleInShard(EngineShard* shard) {
// All transactions in the queue must acquire the intent lock.
lock_granted = shard->db_slice().Acquire(mode, lock_args) && shard_unlocked;
sd.local_mask |= KEYLOCK_ACQUIRED;
DVLOG(1) << "Lock granted " << lock_granted << " for trans " << DebugId();
DVLOG(2) << "Lock granted " << lock_granted << " for trans " << DebugId();
}
if (!txq->Empty()) {