mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2024-12-14 11:58:02 +00:00
fix(replication): fix cancel replication race (#2196)
The bug: One connection calls replica start and second call replica stop. In this flow stop first reset state mask state_mask_.store(0), Start sets state mask state_mask_.store(R_ENABLED) continues to greet and create main replication fiber and then stop runs cntx_.Cancel(), which will at later be reset inside the main replication fiber. In this flow main replication fiber does not cancel and the connection calling to Stop is deadlocked waiting to join the main replication fiber. The fix: run cntx_.Cancel() and state_mask_.store(0) in replica thread. Signed-off-by: adi_holden <adi@dragonflydb.io>
This commit is contained in:
parent
03a6b508e8
commit
c10dac4db2
2 changed files with 7 additions and 2 deletions
|
@ -67,6 +67,7 @@ vector<vector<unsigned>> Partition(unsigned num_flows) {
|
|||
|
||||
Replica::Replica(string host, uint16_t port, Service* se, std::string_view id)
|
||||
: ProtocolClient(std::move(host), port), service_(*se), id_{id} {
|
||||
proactor_ = ProactorBase::me();
|
||||
}
|
||||
|
||||
Replica::~Replica() {
|
||||
|
@ -133,8 +134,11 @@ void Replica::EnableReplication(ConnectionContext* cntx) {
|
|||
void Replica::Stop() {
|
||||
VLOG(1) << "Stopping replication";
|
||||
// Stops the loop in MainReplicationFb.
|
||||
state_mask_.store(0); // Specifically ~R_ENABLED.
|
||||
cntx_.Cancel(); // Context is fully resposible for cleanup.
|
||||
|
||||
proactor_->Await([this] {
|
||||
cntx_.Cancel(); // Context is fully resposible for cleanup.
|
||||
state_mask_.store(0); // Specifically ~R_ENABLED.
|
||||
});
|
||||
|
||||
waker_.notifyAll();
|
||||
|
||||
|
|
|
@ -156,6 +156,7 @@ class Replica : ProtocolClient {
|
|||
std::string GetSyncId() const;
|
||||
|
||||
private:
|
||||
util::fb2::ProactorBase* proactor_ = nullptr;
|
||||
Service& service_;
|
||||
MasterContext master_context_;
|
||||
|
||||
|
|
Loading…
Reference in a new issue