mirror of
https://github.com/dragonflydb/dragonfly.git
synced 2024-12-15 17:51:06 +00:00
fix: increase cluster migration default timeout (#4293)
This commit is contained in:
parent
76f79f0e0b
commit
f892d9b7fb
3 changed files with 16 additions and 10 deletions
|
@ -180,7 +180,7 @@ bool IncomingSlotMigration::Join(long attempt) {
|
||||||
const absl::Duration passed = now - start;
|
const absl::Duration passed = now - start;
|
||||||
VLOG(1) << "Checking whether to continue with join " << passed << " vs " << timeout;
|
VLOG(1) << "Checking whether to continue with join " << passed << " vs " << timeout;
|
||||||
if (passed >= timeout) {
|
if (passed >= timeout) {
|
||||||
LOG(WARNING) << "Can't join migration in time";
|
LOG(WARNING) << "Can't join migration in time for " << source_id_;
|
||||||
ReportError(GenericError("Can't join migration in time"));
|
ReportError(GenericError("Can't join migration in time"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
#include "server/server_family.h"
|
#include "server/server_family.h"
|
||||||
#include "util/fibers/synchronization.h"
|
#include "util/fibers/synchronization.h"
|
||||||
|
|
||||||
ABSL_FLAG(int, slot_migration_connection_timeout_ms, 2000, "Timeout for network operations");
|
ABSL_FLAG(int, slot_migration_connection_timeout_ms, 5000, "Timeout for network operations");
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace facade;
|
using namespace facade;
|
||||||
|
@ -288,10 +288,12 @@ bool OutgoingMigration::FinalizeMigration(long attempt) {
|
||||||
if (cntx_.GetError()) {
|
if (cntx_.GetError()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
VLOG(1) << "Reconnecting to source";
|
VLOG(1) << "Reconnecting " << cf_->MyID() << " : " << migration_info_.node_info.id
|
||||||
|
<< " attempt " << attempt;
|
||||||
auto timeout = absl::GetFlag(FLAGS_slot_migration_connection_timeout_ms) * 1ms;
|
auto timeout = absl::GetFlag(FLAGS_slot_migration_connection_timeout_ms) * 1ms;
|
||||||
if (auto ec = ConnectAndAuth(timeout, &cntx_); ec) {
|
if (auto ec = ConnectAndAuth(timeout, &cntx_); ec) {
|
||||||
LOG(WARNING) << "Couldn't connect to source.";
|
LOG(WARNING) << "Couldn't connect " << cf_->MyID() << " : " << migration_info_.node_info.id
|
||||||
|
<< " attempt " << attempt;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -306,7 +308,8 @@ bool OutgoingMigration::FinalizeMigration(long attempt) {
|
||||||
nullptr, ClientPause::WRITE, is_pause_in_progress);
|
nullptr, ClientPause::WRITE, is_pause_in_progress);
|
||||||
|
|
||||||
if (!pause_fb_opt) {
|
if (!pause_fb_opt) {
|
||||||
LOG(WARNING) << "Cluster migration finalization time out";
|
LOG(WARNING) << "Migration finalization time out " << cf_->MyID() << " : "
|
||||||
|
<< migration_info_.node_info.id << " attempt " << attempt;
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Cleanup cleanup([&is_block_active, &pause_fb_opt]() {
|
absl::Cleanup cleanup([&is_block_active, &pause_fb_opt]() {
|
||||||
|
@ -335,7 +338,8 @@ bool OutgoingMigration::FinalizeMigration(long attempt) {
|
||||||
const absl::Time now = absl::Now();
|
const absl::Time now = absl::Now();
|
||||||
const absl::Duration passed = now - start;
|
const absl::Duration passed = now - start;
|
||||||
if (passed >= timeout) {
|
if (passed >= timeout) {
|
||||||
LOG(WARNING) << "Timeout fot ACK " << attempt;
|
LOG(WARNING) << "Timeout fot ACK " << cf_->MyID() << " : " << migration_info_.node_info.id
|
||||||
|
<< " attempt " << attempt;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -345,15 +349,17 @@ bool OutgoingMigration::FinalizeMigration(long attempt) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!CheckRespFirstTypes({RespExpr::INT64})) {
|
if (!CheckRespFirstTypes({RespExpr::INT64})) {
|
||||||
LOG(WARNING) << "Incorrect response type: "
|
LOG(WARNING) << "Incorrect response type for " << cf_->MyID() << " : "
|
||||||
<< facade::ToSV(LastResponseArgs().front().GetBuf());
|
<< migration_info_.node_info.id << " attempt " << attempt
|
||||||
|
<< " msg: " << facade::ToSV(LastResponseArgs().front().GetBuf());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (const auto res = get<int64_t>(LastResponseArgs().front().u); res == attempt) {
|
if (const auto res = get<int64_t>(LastResponseArgs().front().u); res == attempt) {
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
LOG(WARNING) << "Incorrect attempt payload, sent " << attempt << " received " << res;
|
LOG(WARNING) << "Incorrect attempt payload " << cf_->MyID() << " : "
|
||||||
|
<< migration_info_.node_info.id << ", sent " << attempt << " received " << res;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1614,7 +1614,7 @@ async def test_cluster_fuzzymigration(
|
||||||
res = False
|
res = False
|
||||||
return res
|
return res
|
||||||
|
|
||||||
@assert_eventually(times=500)
|
@assert_eventually(times=600)
|
||||||
async def test_all_finished():
|
async def test_all_finished():
|
||||||
assert await all_finished()
|
assert await all_finished()
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue