1
0
Fork 0
mirror of https://github.com/dragonflydb/dragonfly.git synced 2024-12-14 11:58:02 +00:00

fix: increase cluster migration default timeout (#4293)

This commit is contained in:
Borys 2024-12-11 16:39:41 +02:00 committed by GitHub
parent 76f79f0e0b
commit f892d9b7fb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 16 additions and 10 deletions

View file

@ -180,7 +180,7 @@ bool IncomingSlotMigration::Join(long attempt) {
const absl::Duration passed = now - start;
VLOG(1) << "Checking whether to continue with join " << passed << " vs " << timeout;
if (passed >= timeout) {
LOG(WARNING) << "Can't join migration in time";
LOG(WARNING) << "Can't join migration in time for " << source_id_;
ReportError(GenericError("Can't join migration in time"));
return false;
}

View file

@ -20,7 +20,7 @@
#include "server/server_family.h"
#include "util/fibers/synchronization.h"
ABSL_FLAG(int, slot_migration_connection_timeout_ms, 2000, "Timeout for network operations");
ABSL_FLAG(int, slot_migration_connection_timeout_ms, 5000, "Timeout for network operations");
using namespace std;
using namespace facade;
@ -288,10 +288,12 @@ bool OutgoingMigration::FinalizeMigration(long attempt) {
if (cntx_.GetError()) {
return true;
}
VLOG(1) << "Reconnecting to source";
VLOG(1) << "Reconnecting " << cf_->MyID() << " : " << migration_info_.node_info.id
<< " attempt " << attempt;
auto timeout = absl::GetFlag(FLAGS_slot_migration_connection_timeout_ms) * 1ms;
if (auto ec = ConnectAndAuth(timeout, &cntx_); ec) {
LOG(WARNING) << "Couldn't connect to source.";
LOG(WARNING) << "Couldn't connect " << cf_->MyID() << " : " << migration_info_.node_info.id
<< " attempt " << attempt;
return false;
}
}
@ -306,7 +308,8 @@ bool OutgoingMigration::FinalizeMigration(long attempt) {
nullptr, ClientPause::WRITE, is_pause_in_progress);
if (!pause_fb_opt) {
LOG(WARNING) << "Cluster migration finalization time out";
LOG(WARNING) << "Migration finalization time out " << cf_->MyID() << " : "
<< migration_info_.node_info.id << " attempt " << attempt;
}
absl::Cleanup cleanup([&is_block_active, &pause_fb_opt]() {
@ -335,7 +338,8 @@ bool OutgoingMigration::FinalizeMigration(long attempt) {
const absl::Time now = absl::Now();
const absl::Duration passed = now - start;
if (passed >= timeout) {
LOG(WARNING) << "Timeout fot ACK " << attempt;
LOG(WARNING) << "Timeout fot ACK " << cf_->MyID() << " : " << migration_info_.node_info.id
<< " attempt " << attempt;
return false;
}
@ -345,15 +349,17 @@ bool OutgoingMigration::FinalizeMigration(long attempt) {
}
if (!CheckRespFirstTypes({RespExpr::INT64})) {
LOG(WARNING) << "Incorrect response type: "
<< facade::ToSV(LastResponseArgs().front().GetBuf());
LOG(WARNING) << "Incorrect response type for " << cf_->MyID() << " : "
<< migration_info_.node_info.id << " attempt " << attempt
<< " msg: " << facade::ToSV(LastResponseArgs().front().GetBuf());
return false;
}
if (const auto res = get<int64_t>(LastResponseArgs().front().u); res == attempt) {
break;
} else {
LOG(WARNING) << "Incorrect attempt payload, sent " << attempt << " received " << res;
LOG(WARNING) << "Incorrect attempt payload " << cf_->MyID() << " : "
<< migration_info_.node_info.id << ", sent " << attempt << " received " << res;
}
}

View file

@ -1614,7 +1614,7 @@ async def test_cluster_fuzzymigration(
res = False
return res
@assert_eventually(times=500)
@assert_eventually(times=600)
async def test_all_finished():
assert await all_finished()