1
0
Fork 0
mirror of https://github.com/dragonflydb/dragonfly.git synced 2024-12-14 11:58:02 +00:00

feat(replica): atomicity for multi shard commands (#598)

This commit is contained in:
adiholden 2022-12-27 16:13:05 +02:00 committed by GitHub
parent e6721d8160
commit aacab66434
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 67 additions and 34 deletions

View file

@ -9,20 +9,42 @@
namespace dfly {
JournalExecutor::JournalExecutor(Service* service) : service_{service} {
JournalExecutor::JournalExecutor(Service* service)
: service_{service}, conn_context_{&null_sink_, nullptr} {
conn_context_.is_replicating = true;
conn_context_.journal_emulated = true;
}
void JournalExecutor::Execute(std::vector<journal::ParsedEntry>& entries) {
DCHECK_GT(entries.size(), 1U);
conn_context_.conn_state.db_index = entries.front().dbid;
std::string multi_cmd = {"MULTI"};
auto ms = MutableSlice{&multi_cmd[0], multi_cmd.size()};
auto span = CmdArgList{&ms, 1};
service_->DispatchCommand(span, &conn_context_);
for (auto& entry : entries) {
if (entry.payload) {
DCHECK_EQ(entry.dbid, conn_context_.conn_state.db_index);
span = CmdArgList{entry.payload->data(), entry.payload->size()};
service_->DispatchCommand(span, &conn_context_);
}
}
std::string exec_cmd = {"EXEC"};
ms = {&exec_cmd[0], exec_cmd.size()};
span = {&ms, 1};
service_->DispatchCommand(span, &conn_context_);
}
void JournalExecutor::Execute(journal::ParsedEntry& entry) {
if (entry.payload) {
io::NullSink null_sink;
ConnectionContext conn_context{&null_sink, nullptr};
conn_context.is_replicating = true;
conn_context.journal_emulated = true;
conn_context.conn_state.db_index = entry.dbid;
conn_context_.conn_state.db_index = entry.dbid;
if (entry.payload) { // TODO - when this is false?
auto span = CmdArgList{entry.payload->data(), entry.payload->size()};
service_->DispatchCommand(span, &conn_context);
service_->DispatchCommand(span, &conn_context_);
}
}

View file

@ -14,10 +14,13 @@ class Service;
class JournalExecutor {
public:
JournalExecutor(Service* service);
void Execute(std::vector<journal::ParsedEntry>& entries);
void Execute(journal::ParsedEntry& entry);
private:
Service* service_;
ConnectionContext conn_context_;
io::NullSink null_sink_;
};
} // namespace dfly

View file

@ -759,13 +759,13 @@ void Replica::StableSyncDflyFb(Context* cntx) {
cntx->ReportError(res.error(), "Journal format error");
return;
}
ExecuteEntry(&executor, res.value());
ExecuteEntry(&executor, std::move(res.value()));
last_io_time_ = sock_->proactor()->GetMonotonicTimeNs();
}
return;
}
void Replica::ExecuteEntry(JournalExecutor* executor, journal::ParsedEntry& entry) {
void Replica::ExecuteEntry(JournalExecutor* executor, journal::ParsedEntry&& entry) {
if (entry.shard_cnt <= 1) { // not multi shard cmd
executor->Execute(entry);
return;
@ -773,46 +773,53 @@ void Replica::ExecuteEntry(JournalExecutor* executor, journal::ParsedEntry& entr
// Multi shard command flow:
// step 1: Fiber wait until all the fibers that should execute this tranaction got
// to the journal entry of the transaction.
// step 2: execute the command (All fibers)
// step 3: Fiber wait until all fibers finished the execution
// By step 1 we enforce that replica will execute multi shard commands that finished on master
// By step 3 we ensures the correctness of flushall/flushdb commands
// TODO: this implemantaion does not support atomicity in replica
// Although multi shard transaction happen in step 2 very close to each other,
// user can query replica between executions.
// To support atomicity we should have one fiber in step 2 which will excute all the entries of
// the transaction together. In case of global comand such as flushdb the command can be executed
// by only one fiber.
// to the journal entry of the transaction. This step enforces that replica will execute multi
// shard commands that finished on master.
// step 2: Execute the commands from one fiber. This step ensures atomicity of replica.
// step 3: Fiber wait until all fibers finished the execution. This step ensures atomicity of
// operations on replica.
// TODO: support error handler in this flow
// Only the first fiber to reach the transaction will create data for transaction in map
multi_shard_exe_->map_mu.lock();
auto [it, was_insert] = multi_shard_exe_->tx_sync_execution.emplace(entry.txid, entry.shard_cnt);
VLOG(2) << "txid: " << entry.txid << " unique_shard_cnt_: " << entry.shard_cnt
<< " was_insert: " << was_insert;
TxId txid = entry.txid;
// entries_vec will store all entries of trasaction and will be executed by the fiber that
// inserted the txid to map. In case of global command the inserting fiber will executed his
// entry.
bool global_cmd = (entry.payload.value().size() == 1);
if (!global_cmd) {
it->second.entries_vec.push_back(std::move(entry));
}
auto& tx_sync = it->second;
// Note: we must release the mutex befor calling wait on barrier
multi_shard_exe_->map_mu.unlock();
VLOG(2) << "txid: " << entry.txid << " unique_shard_cnt_: " << entry.shard_cnt
<< " was_insert: " << was_insert;
// step 1
it->second.barrier.wait();
tx_sync.barrier.wait();
// step 2
executor->Execute(entry);
if (was_insert) {
if (global_cmd) {
executor->Execute(entry);
} else {
executor->Execute(tx_sync.entries_vec);
}
}
// step 3
it->second.barrier.wait();
tx_sync.barrier.wait();
// Note: erase from map can be done only after all fibers returned from wait.
// The last fiber which will decrease the counter to 0 will be the one to erase the data from map
auto val = it->second.counter.fetch_sub(1, std::memory_order_relaxed);
VLOG(2) << "txid: " << entry.txid << " unique_shard_cnt_: " << entry.shard_cnt
<< " counter: " << val;
auto val = tx_sync.counter.fetch_sub(1, std::memory_order_relaxed);
VLOG(2) << "txid: " << txid << " counter: " << val;
if (val == 1) {
std::lock_guard lg{multi_shard_exe_->map_mu};
multi_shard_exe_->tx_sync_execution.erase(entry.txid);
multi_shard_exe_->tx_sync_execution.erase(txid);
}
}

View file

@ -58,6 +58,7 @@ class Replica {
std::atomic_uint32_t counter;
TxExecutionSync(uint32_t counter) : barrier(counter), counter(counter) {
}
std::vector<journal::ParsedEntry> entries_vec;
};
std::unordered_map<TxId, TxExecutionSync> tx_sync_execution;
@ -141,7 +142,7 @@ class Replica {
// Send command, update last_io_time, return error.
std::error_code SendCommand(std::string_view command, facade::ReqSerializer* serializer);
void ExecuteEntry(JournalExecutor* executor, journal::ParsedEntry& entry);
void ExecuteEntry(JournalExecutor* executor, journal::ParsedEntry&& entry);
public: /* Utility */
struct Info {