From 50a7f2bcb11ae27963402736313ffe0e91bd7f9d Mon Sep 17 00:00:00 2001 From: Shahar Mike Date: Tue, 8 Oct 2024 14:47:31 +0300 Subject: [PATCH] fix: Do not kill Dragonfly on failed `DFLY LOAD` (#3892) Today, some of the failures to load an RDB file passed via `--dbfilename` cause Dragonfly to terminate with an error code. This is ok and works as expected. The problem is that the same code path is used for `DFLY LOAD`, which means that if there's an error loading the file (such as corrupted file), Dragonfly will exit instead of returning an error code to the client. This change fixes that, by only exiting in the code path which loads on init. Note to reviewer: apparently we can't call `Future::Get()` more than once, as the first call resets the state of the future and drops the previously saved value, so we use a Fiber here instead. --- src/server/server_family.cc | 18 +++++++++++++----- src/server/server_family.h | 2 +- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/server/server_family.cc b/src/server/server_family.cc index ea5d407d6..b5a152b16 100644 --- a/src/server/server_family.cc +++ b/src/server/server_family.cc @@ -916,7 +916,15 @@ void ServerFamily::LoadFromSnapshot() { if (load_path_result) { const std::string load_path = *load_path_result; if (!load_path.empty()) { - load_result_ = Load(load_path, LoadExistingKeys::kFail); + auto future = Load(load_path, LoadExistingKeys::kFail); + load_fiber_ = service_.proactor_pool().GetNextProactor()->LaunchFiber([future]() mutable { + // Wait for load to finish in a dedicated fiber. + // Failure to load on start causes Dragonfly to exit with an error code. + if (!future.has_value() || future->Get()) { + // Error was already printed to log at this point. + exit(1); + } + }); } } else { if (std::error_code(load_path_result.error()) == std::errc::no_such_file_or_directory) { @@ -938,9 +946,7 @@ void ServerFamily::JoinSnapshotSchedule() { void ServerFamily::Shutdown() { VLOG(1) << "ServerFamily::Shutdown"; - if (load_result_) { - std::exchange(load_result_, std::nullopt)->Get(); - } + load_fiber_.JoinIfNeeded(); JoinSnapshotSchedule(); @@ -1121,7 +1127,9 @@ std::optional> ServerFamily::Load(string_view load_pat if (aggregated_result->first_error) { LOG(ERROR) << "Rdb load failed. " << (*aggregated_result->first_error).message(); - exit(1); + service_.SwitchState(GlobalState::LOADING, GlobalState::ACTIVE); + future.Resolve(*aggregated_result->first_error); + return; } RdbLoader::PerformPostLoad(&service_); diff --git a/src/server/server_family.h b/src/server/server_family.h index b89fda512..82c613d3b 100644 --- a/src/server/server_family.h +++ b/src/server/server_family.h @@ -331,7 +331,7 @@ class ServerFamily { bool DoAuth(ConnectionContext* cntx, std::string_view username, std::string_view password) const; util::fb2::Fiber snapshot_schedule_fb_; - std::optional> load_result_; + util::fb2::Fiber load_fiber_; Service& service_;