1
0
Fork 0
mirror of https://github.com/dragonflydb/dragonfly.git synced 2024-12-15 17:51:06 +00:00

fix: increase cluster migration default timeout (#4293)

This commit is contained in:
Borys 2024-12-11 16:39:41 +02:00 committed by GitHub
parent 76f79f0e0b
commit f892d9b7fb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 16 additions and 10 deletions

View file

@ -180,7 +180,7 @@ bool IncomingSlotMigration::Join(long attempt) {
const absl::Duration passed = now - start; const absl::Duration passed = now - start;
VLOG(1) << "Checking whether to continue with join " << passed << " vs " << timeout; VLOG(1) << "Checking whether to continue with join " << passed << " vs " << timeout;
if (passed >= timeout) { if (passed >= timeout) {
LOG(WARNING) << "Can't join migration in time"; LOG(WARNING) << "Can't join migration in time for " << source_id_;
ReportError(GenericError("Can't join migration in time")); ReportError(GenericError("Can't join migration in time"));
return false; return false;
} }

View file

@ -20,7 +20,7 @@
#include "server/server_family.h" #include "server/server_family.h"
#include "util/fibers/synchronization.h" #include "util/fibers/synchronization.h"
ABSL_FLAG(int, slot_migration_connection_timeout_ms, 2000, "Timeout for network operations"); ABSL_FLAG(int, slot_migration_connection_timeout_ms, 5000, "Timeout for network operations");
using namespace std; using namespace std;
using namespace facade; using namespace facade;
@ -288,10 +288,12 @@ bool OutgoingMigration::FinalizeMigration(long attempt) {
if (cntx_.GetError()) { if (cntx_.GetError()) {
return true; return true;
} }
VLOG(1) << "Reconnecting to source"; VLOG(1) << "Reconnecting " << cf_->MyID() << " : " << migration_info_.node_info.id
<< " attempt " << attempt;
auto timeout = absl::GetFlag(FLAGS_slot_migration_connection_timeout_ms) * 1ms; auto timeout = absl::GetFlag(FLAGS_slot_migration_connection_timeout_ms) * 1ms;
if (auto ec = ConnectAndAuth(timeout, &cntx_); ec) { if (auto ec = ConnectAndAuth(timeout, &cntx_); ec) {
LOG(WARNING) << "Couldn't connect to source."; LOG(WARNING) << "Couldn't connect " << cf_->MyID() << " : " << migration_info_.node_info.id
<< " attempt " << attempt;
return false; return false;
} }
} }
@ -306,7 +308,8 @@ bool OutgoingMigration::FinalizeMigration(long attempt) {
nullptr, ClientPause::WRITE, is_pause_in_progress); nullptr, ClientPause::WRITE, is_pause_in_progress);
if (!pause_fb_opt) { if (!pause_fb_opt) {
LOG(WARNING) << "Cluster migration finalization time out"; LOG(WARNING) << "Migration finalization time out " << cf_->MyID() << " : "
<< migration_info_.node_info.id << " attempt " << attempt;
} }
absl::Cleanup cleanup([&is_block_active, &pause_fb_opt]() { absl::Cleanup cleanup([&is_block_active, &pause_fb_opt]() {
@ -335,7 +338,8 @@ bool OutgoingMigration::FinalizeMigration(long attempt) {
const absl::Time now = absl::Now(); const absl::Time now = absl::Now();
const absl::Duration passed = now - start; const absl::Duration passed = now - start;
if (passed >= timeout) { if (passed >= timeout) {
LOG(WARNING) << "Timeout fot ACK " << attempt; LOG(WARNING) << "Timeout fot ACK " << cf_->MyID() << " : " << migration_info_.node_info.id
<< " attempt " << attempt;
return false; return false;
} }
@ -345,15 +349,17 @@ bool OutgoingMigration::FinalizeMigration(long attempt) {
} }
if (!CheckRespFirstTypes({RespExpr::INT64})) { if (!CheckRespFirstTypes({RespExpr::INT64})) {
LOG(WARNING) << "Incorrect response type: " LOG(WARNING) << "Incorrect response type for " << cf_->MyID() << " : "
<< facade::ToSV(LastResponseArgs().front().GetBuf()); << migration_info_.node_info.id << " attempt " << attempt
<< " msg: " << facade::ToSV(LastResponseArgs().front().GetBuf());
return false; return false;
} }
if (const auto res = get<int64_t>(LastResponseArgs().front().u); res == attempt) { if (const auto res = get<int64_t>(LastResponseArgs().front().u); res == attempt) {
break; break;
} else { } else {
LOG(WARNING) << "Incorrect attempt payload, sent " << attempt << " received " << res; LOG(WARNING) << "Incorrect attempt payload " << cf_->MyID() << " : "
<< migration_info_.node_info.id << ", sent " << attempt << " received " << res;
} }
} }

View file

@ -1614,7 +1614,7 @@ async def test_cluster_fuzzymigration(
res = False res = False
return res return res
@assert_eventually(times=500) @assert_eventually(times=600)
async def test_all_finished(): async def test_all_finished():
assert await all_finished() assert await all_finished()