1
0
Fork 0
mirror of https://github.com/dragonflydb/dragonfly.git synced 2024-12-14 11:58:02 +00:00

fix(server): Fix a bug with brpoplpush (#677)

fix(server): Fix a bug when an expired transaction stays in watched queue.

Now we remove the transaction from the watched queues in a consistent manner based on the
keys it was assigned to watch.

Signed-off-by: Roman Gershman <roman@dragonflydb.io>

Signed-off-by: Roman Gershman <roman@dragonflydb.io>
This commit is contained in:
Roman Gershman 2023-01-14 20:18:28 +02:00 committed by GitHub
parent dd218fa037
commit 613e3b8741
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 79 additions and 80 deletions

View file

@ -199,8 +199,6 @@ void BlockingController::RemoveWatched(ArgSlice keys, Transaction* trans) {
if (wt.queue_map.empty()) {
watched_dbs_.erase(dbit);
}
awakened_transactions_.erase(trans);
}
// Called from commands like lpush.

View file

@ -48,6 +48,10 @@ class BlockingController {
size_t NumWatched(DbIndex db_indx) const;
std::vector<std::string> GetWatchedKeys(DbIndex db_indx) const;
void RemoveAwaked(Transaction* trans) {
awakened_transactions_.erase(trans);
}
private:
struct WatchQueue;
struct DbWatchTable;
@ -70,7 +74,5 @@ class BlockingController {
// There can be multiple transactions like this because a transaction
// could awaken arbitrary number of keys.
absl::flat_hash_set<Transaction*> awakened_transactions_;
// absl::btree_multimap<TxId, Transaction*> waiting_convergence_;
};
} // namespace dfly

View file

@ -81,8 +81,7 @@ TEST_F(BlockingControllerTest, Timeout) {
time_point tp = steady_clock::now() + chrono::milliseconds(10);
trans_->Schedule();
auto keys = trans_->ShardArgsInShard(0);
auto cb = [&](Transaction* t, EngineShard* shard) { return t->WatchInShard(keys, shard); };
auto cb = [&](Transaction* t, EngineShard* shard) { return trans_->ShardArgsInShard(0); };
bool res = trans_->WaitOnWatch(tp, cb);

View file

@ -292,8 +292,11 @@ void EngineShard::PollExecution(const char* context, Transaction* trans) {
CHECK_EQ(committed_txid_, trans->notify_txid());
bool keep = trans->RunInShard(this);
if (keep)
if (keep) {
return;
} else {
blocking_controller_->RemoveAwaked(trans);
}
}
if (continuation_trans_) {

View file

@ -202,6 +202,7 @@ class EngineShard {
uint32_t periodic_task_ = 0;
uint32_t defrag_task_ = 0;
DefragTaskState defrag_state_;
std::unique_ptr<TieredStorage> tiered_storage_;
std::unique_ptr<BlockingController> blocking_controller_;

View file

@ -240,13 +240,12 @@ OpStatus BPopper::Run(Transaction* t, unsigned msec) {
}
// Block
auto cb = [&](Transaction* t, EngineShard* shard) {
auto keys = t->ShardArgsInShard(shard->shard_id());
return t->WatchInShard(keys, shard);
auto wcb = [&](Transaction* t, EngineShard* shard) {
return t->ShardArgsInShard(shard->shard_id());
};
++stats->num_blocked_clients;
bool wait_succeeded = t->WaitOnWatch(tp, std::move(cb));
bool wait_succeeded = t->WaitOnWatch(tp, std::move(wcb));
--stats->num_blocked_clients;
if (!wait_succeeded)
@ -884,10 +883,8 @@ OpResult<string> BPopPusher::RunSingle(Transaction* t, time_point tp) {
}
auto* stats = ServerState::tl_connection_stats();
auto wcb = [&](Transaction* t, EngineShard* shard) {
ArgSlice keys{&this->pop_key_, 1};
return t->WatchInShard(keys, shard);
};
auto wcb = [&](Transaction* t, EngineShard* shard) { return ArgSlice{&this->pop_key_, 1}; };
// Block
++stats->num_blocked_clients;
@ -919,10 +916,7 @@ OpResult<string> BPopPusher::RunPair(Transaction* t, time_point tp) {
// Therefore we follow the regular flow of watching the key but for the destination shard it
// will never be triggerred.
// This allows us to run Transaction::Execute on watched transactions in both shards.
auto wcb = [&](Transaction* t, EngineShard* shard) {
ArgSlice keys{&this->pop_key_, 1};
return t->WatchInShard(keys, shard);
};
auto wcb = [&](Transaction* t, EngineShard* shard) { return ArgSlice{&this->pop_key_, 1}; };
++stats->num_blocked_clients;

View file

@ -9,6 +9,7 @@
#include "base/gtest.h"
#include "base/logging.h"
#include "facade/facade_test.h"
#include "server/blocking_controller.h"
#include "server/command_registry.h"
#include "server/conn_context.h"
#include "server/engine_shard_set.h"
@ -28,6 +29,17 @@ class ListFamilyTest : public BaseFamilyTest {
ListFamilyTest() {
num_threads_ = 4;
}
unsigned NumWatched() {
atomic_uint32_t sum{0};
shard_set->RunBriefInParallel([&](EngineShard* es) {
auto* bc = es->blocking_controller();
if (bc)
sum.fetch_add(bc->NumWatched(0), memory_order_relaxed);
});
return sum.load();
}
};
const char kKey1[] = "x";
@ -114,6 +126,7 @@ TEST_F(ListFamilyTest, BLPopBlocking) {
ASSERT_THAT(resp0, ArrLen(2));
EXPECT_THAT(resp0.GetVec(), ElementsAre("x", "1"));
ASSERT_FALSE(IsLocked(0, "x"));
ASSERT_EQ(0, NumWatched());
}
TEST_F(ListFamilyTest, BLPopMultiple) {
@ -137,7 +150,7 @@ TEST_F(ListFamilyTest, BLPopMultiple) {
EXPECT_THAT(resp0.GetVec(), ElementsAre(kKey1, "3"));
ASSERT_FALSE(IsLocked(0, kKey1));
ASSERT_FALSE(IsLocked(0, kKey2));
// ess_->RunBriefInParallel([](EngineShard* es) { ASSERT_FALSE(es->HasAwakedTransaction()); });
ASSERT_EQ(0, NumWatched());
}
TEST_F(ListFamilyTest, BLPopTimeout) {
@ -155,6 +168,7 @@ TEST_F(ListFamilyTest, BLPopTimeout) {
EXPECT_THAT(resp, ArgType(RespExpr::NIL_ARRAY));
ASSERT_FALSE(service_->IsLocked(0, kKey1));
ASSERT_EQ(0, NumWatched());
}
TEST_F(ListFamilyTest, BLPopTimeout2) {
@ -171,6 +185,7 @@ TEST_F(ListFamilyTest, BLPopTimeout2) {
Run({"DEL", "blist2"});
Run({"RPUSH", "blist2", "d"});
Run({"BLPOP", "blist1", "blist2", "1"});
ASSERT_EQ(0, NumWatched());
}
TEST_F(ListFamilyTest, BLPopMultiPush) {
@ -210,6 +225,7 @@ TEST_F(ListFamilyTest, BLPopMultiPush) {
ASSERT_THAT(blpop_resp, ArrLen(2));
auto resp_arr = blpop_resp.GetVec();
EXPECT_THAT(resp_arr, ElementsAre(kKey1, "A"));
ASSERT_EQ(0, NumWatched());
}
TEST_F(ListFamilyTest, BLPopSerialize) {
@ -638,10 +654,10 @@ TEST_F(ListFamilyTest, TwoQueueBug451) {
auto push_fiber = [&]() {
auto id = "t-" + std::to_string(it_cnt.fetch_add(1));
for (int i = 0; i < 1000; i++) {
for (int i = 0; i < 300; i++) {
Run(id, {"rpush", "a", "DATA"});
}
fibers_ext::SleepFor(100ms);
fibers_ext::SleepFor(50ms);
running = false;
};
@ -662,6 +678,7 @@ TEST_F(ListFamilyTest, TwoQueueBug451) {
TEST_F(ListFamilyTest, BRPopLPushSingleShard) {
EXPECT_THAT(Run({"brpoplpush", "x", "y", "0.05"}), ArgType(RespExpr::NIL));
ASSERT_EQ(0, NumWatched());
EXPECT_THAT(Run({"lpush", "x", "val1"}), IntArg(1));
EXPECT_EQ(Run({"brpoplpush", "x", "y", "0.01"}), "val1");
@ -682,6 +699,7 @@ TEST_F(ListFamilyTest, BRPopLPushSingleShard) {
EXPECT_THAT(resp, ArgType(RespExpr::NIL));
ASSERT_FALSE(IsLocked(0, "x"));
ASSERT_FALSE(IsLocked(0, "y"));
ASSERT_EQ(0, NumWatched());
}
TEST_F(ListFamilyTest, BRPopLPushSingleShardBlocking) {
@ -699,12 +717,15 @@ TEST_F(ListFamilyTest, BRPopLPushSingleShardBlocking) {
ASSERT_EQ(resp, "1");
ASSERT_FALSE(IsLocked(0, "x"));
ASSERT_FALSE(IsLocked(0, "y"));
ASSERT_EQ(0, NumWatched());
}
TEST_F(ListFamilyTest, BRPopLPushTwoShards) {
RespExpr resp;
EXPECT_THAT(Run({"brpoplpush", "x", "z", "0.05"}), ArgType(RespExpr::NIL));
ASSERT_EQ(0, NumWatched());
Run({"lpush", "x", "val"});
EXPECT_EQ(Run({"brpoplpush", "x", "z", "0"}), "val");
resp = Run({"lrange", "z", "0", "-1"});
@ -732,6 +753,7 @@ TEST_F(ListFamilyTest, BRPopLPushTwoShards) {
ASSERT_THAT(resp.GetVec(), ElementsAre("val1", "val2"));
ASSERT_FALSE(IsLocked(0, "x"));
ASSERT_FALSE(IsLocked(0, "z"));
ASSERT_EQ(0, NumWatched());
// TODO: there is a bug here.
// we do not wake the dest shard, when source is awaked which prevents
// the atomicity and causes the first bug as well.

View file

@ -402,10 +402,6 @@ bool Transaction::RunInShard(EngineShard* shard) {
if (was_suspended || !become_suspended) {
shard->db_slice().Release(mode, largs);
sd.local_mask &= ~KEYLOCK_ACQUIRED;
if (was_suspended || (sd.local_mask & AWAKED_Q)) {
shard->blocking_controller()->RemoveWatched(ShardArgsInShard(shard->shard_id()), this);
}
}
sd.local_mask &= ~OUT_OF_ORDER;
}
@ -813,13 +809,18 @@ void Transaction::RunQuickie(EngineShard* shard) {
// runs in coordinator thread.
// Marks the transaction as expired and removes it from the waiting queue.
void Transaction::ExpireBlocking() {
DVLOG(1) << "ExpireBlocking " << DebugId();
void Transaction::UnwatchBlocking(bool should_expire, WaitKeysPovider wcb) {
DVLOG(1) << "UnwatchBlocking " << DebugId();
DCHECK(!IsGlobal());
run_count_.store(unique_shard_cnt_, memory_order_release);
auto expire_cb = [this] { ExpireShardCb(EngineShard::tlocal()); };
auto expire_cb = [&] {
EngineShard* es = EngineShard::tlocal();
ArgSlice wkeys = wcb(this, es);
UnwatchShardCb(wkeys, should_expire, es);
};
if (unique_shard_cnt_ == 1) {
DCHECK_LT(unique_shard_id_, shard_set->size());
@ -837,7 +838,7 @@ void Transaction::ExpireBlocking() {
// Wait for all callbacks to conclude.
WaitForShardCallbacks();
DVLOG(1) << "ExpireBlocking finished " << DebugId();
DVLOG(1) << "UnwatchBlocking finished " << DebugId();
}
const char* Transaction::Name() const {
@ -1009,12 +1010,17 @@ size_t Transaction::ReverseArgIndex(ShardId shard_id, size_t arg_index) const {
return reverse_index_[sd.arg_start + arg_index];
}
bool Transaction::WaitOnWatch(const time_point& tp, RunnableType cb) {
bool Transaction::WaitOnWatch(const time_point& tp, WaitKeysPovider wkeys_provider) {
// Assumes that transaction is pending and scheduled. TODO: To verify it with state machine.
VLOG(2) << "WaitOnWatch Start use_count(" << use_count() << ")";
using namespace chrono;
Execute(cb, true);
auto cb = [&](Transaction* t, EngineShard* shard) {
auto keys = wkeys_provider(t, shard);
return t->WatchInShard(keys, shard);
};
Execute(move(cb), true);
coordinator_state_ |= COORD_BLOCKED;
@ -1038,40 +1044,11 @@ bool Transaction::WaitOnWatch(const time_point& tp, RunnableType cb) {
DVLOG(1) << "WaitOnWatch await_until " << int(status);
}
if ((coordinator_state_ & COORD_CANCELLED) || status == cv_status::timeout) {
ExpireBlocking();
coordinator_state_ &= ~COORD_BLOCKED;
return false;
}
#if 0
// We were notified by a shard, so lets make sure that our notifications converged to a stable
// form.
if (unique_shard_cnt_ > 1) {
run_count_.store(unique_shard_cnt_, memory_order_release);
auto converge_cb = [this] {
this->CheckForConvergence(EngineShard::tlocal());
};
for (ShardId i = 0; i < shard_data_.size(); ++i) {
auto& sd = shard_data_[i];
DCHECK_EQ(0, sd.local_mask & ARMED);
if (sd.arg_count == 0)
continue;
shard_set->Add(i, converge_cb);
}
// Wait for all callbacks to conclude.
WaitForShardCallbacks();
DVLOG(1) << "Convergence finished " << DebugId();
}
#endif
// Lift blocking mask.
bool is_expired = (coordinator_state_ & COORD_CANCELLED) || status == cv_status::timeout;
UnwatchBlocking(is_expired, wkeys_provider);
coordinator_state_ &= ~COORD_BLOCKED;
return true;
return !is_expired;
}
// Runs only in the shard thread.
@ -1091,7 +1068,8 @@ OpStatus Transaction::WatchInShard(ArgSlice keys, EngineShard* shard) {
return OpStatus::OK;
}
void Transaction::ExpireShardCb(EngineShard* shard) {
void Transaction::UnwatchShardCb(ArgSlice wkeys, bool should_expire, EngineShard* shard) {
if (should_expire) {
auto lock_args = GetLockArgs(shard->shard_id());
shard->db_slice().Release(Mode(), lock_args);
@ -1099,14 +1077,15 @@ void Transaction::ExpireShardCb(EngineShard* shard) {
auto& sd = shard_data_[sd_idx];
sd.local_mask |= EXPIRED_Q;
sd.local_mask &= ~KEYLOCK_ACQUIRED;
}
shard->blocking_controller()->RemoveWatched(ShardArgsInShard(shard->shard_id()), this);
shard->blocking_controller()->RemoveWatched(wkeys, this);
// Need to see why I decided to call this.
// My guess - probably to trigger the run of stalled transactions in case
// this shard concurrently awoke this transaction and stalled the processing
// of TxQueue.
shard->PollExecution("expirecb", nullptr);
shard->PollExecution("unwatchcb", nullptr);
CHECK_GE(DecreaseRunCnt(), 1u);
}

View file

@ -161,7 +161,8 @@ class Transaction {
// or b) tp is reached. If tp is time_point::max() then waits indefinitely.
// Expects that the transaction had been scheduled before, and uses Execute(.., true) to register.
// Returns false if timeout occurred, true if was notified by one of the keys.
bool WaitOnWatch(const time_point& tp, RunnableType cb);
using WaitKeysPovider = std::function<ArgSlice(Transaction*, EngineShard* shard)>;
bool WaitOnWatch(const time_point& tp, WaitKeysPovider cb);
// Returns true if transaction is awaked, false if it's timed-out and can be removed from the
// blocking queue. NotifySuspended may be called from (multiple) shard threads and
@ -191,9 +192,6 @@ class Transaction {
return db_index_;
}
// Adds itself to watched queue in the shard. Must run in that shard thread.
OpStatus WatchInShard(ArgSlice keys, EngineShard* shard);
private:
struct LockCnt {
unsigned cnt[2] = {0, 0};
@ -208,7 +206,7 @@ class Transaction {
void ScheduleInternal();
void LockMulti();
void ExpireBlocking();
void UnwatchBlocking(bool should_expire, WaitKeysPovider wcb);
void ExecuteAsync();
// Optimized version of RunInShard for single shard uncontended cases.
@ -226,9 +224,12 @@ class Transaction {
// Returns true if we need to follow up with PollExecution on this shard.
bool CancelShardCb(EngineShard* shard);
void ExpireShardCb(EngineShard* shard);
void UnwatchShardCb(ArgSlice wkeys, bool should_expire, EngineShard* shard);
void UnlockMultiShardCb(const std::vector<KeyList>& sharded_keys, EngineShard* shard);
// Adds itself to watched queue in the shard. Must run in that shard thread.
OpStatus WatchInShard(ArgSlice keys, EngineShard* shard);
void WaitForShardCallbacks() {
run_ec_.await([this] { return 0 == run_count_.load(std::memory_order_relaxed); });