1
0
Fork 0
mirror of https://github.com/dragonflydb/dragonfly.git synced 2024-12-14 11:58:02 +00:00

fix: fix move error during migration finalization (#3253)

* fix: fix Move error during migration finalization
This commit is contained in:
Borys 2024-07-02 14:23:54 +03:00 committed by GitHub
parent 506ecbc3cd
commit 84814a7358
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 185 additions and 111 deletions

View file

@ -90,6 +90,7 @@ shared_ptr<ClusterConfig> ClusterConfig::CreateFromConfig(string_view my_id,
shared_ptr<ClusterConfig> result(new ClusterConfig()); shared_ptr<ClusterConfig> result(new ClusterConfig());
result->my_id_ = my_id;
result->config_ = config; result->config_ = config;
for (const auto& shard : result->config_) { for (const auto& shard : result->config_) {
@ -101,10 +102,10 @@ shared_ptr<ClusterConfig> ClusterConfig::CreateFromConfig(string_view my_id,
result->my_outgoing_migrations_ = shard.migrations; result->my_outgoing_migrations_ = shard.migrations;
} else { } else {
for (const auto& m : shard.migrations) { for (const auto& m : shard.migrations) {
if (my_id == m.node_id) { if (my_id == m.node_info.id) {
auto incoming_migration = m; auto incoming_migration = m;
// for incoming migration we need the source node // for incoming migration we need the source node
incoming_migration.node_id = shard.master.id; incoming_migration.node_info.id = shard.master.id;
result->my_incoming_migrations_.push_back(std::move(incoming_migration)); result->my_incoming_migrations_.push_back(std::move(incoming_migration));
} }
} }
@ -132,7 +133,7 @@ optional<SlotRanges> GetClusterSlotRanges(const JsonType& slots) {
return nullopt; return nullopt;
} }
SlotRanges ranges; std::vector<SlotRange> ranges;
for (const auto& range : slots.array_range()) { for (const auto& range : slots.array_range()) {
if (!range.is_object()) { if (!range.is_object()) {
@ -149,7 +150,7 @@ optional<SlotRanges> GetClusterSlotRanges(const JsonType& slots) {
ranges.push_back({.start = start.value(), .end = end.value()}); ranges.push_back({.start = start.value(), .end = end.value()});
} }
return ranges; return SlotRanges(ranges);
} }
optional<ClusterNodeInfo> ParseClusterNode(const JsonType& json) { optional<ClusterNodeInfo> ParseClusterNode(const JsonType& json) {
@ -211,10 +212,10 @@ optional<std::vector<MigrationInfo>> ParseMigrations(const JsonType& json) {
return nullopt; return nullopt;
} }
res.emplace_back(MigrationInfo{.slot_ranges = std::move(*slots), res.emplace_back(MigrationInfo{
.node_id = node_id.as_string(), .slot_ranges = std::move(*slots),
.ip = ip.as_string(), .node_info =
.port = *port}); ClusterNodeInfo{.id = node_id.as_string(), .ip = ip.as_string(), .port = *port}});
} }
return res; return res;
} }
@ -316,10 +317,17 @@ ClusterNodeInfo ClusterConfig::GetMasterNodeForSlot(SlotId id) const {
CHECK_LE(id, cluster::kMaxSlotNum) << "Requesting a non-existing slot id " << id; CHECK_LE(id, cluster::kMaxSlotNum) << "Requesting a non-existing slot id " << id;
for (const auto& shard : config_) { for (const auto& shard : config_) {
for (const auto& range : shard.slot_ranges) { if (shard.slot_ranges.Contains(id)) {
if (id >= range.start && id <= range.end) { if (shard.master.id == my_id_) {
return shard.master; // The only reason why this function call and shard.master == my_id_ is the slot was
// migrated
for (const auto& m : shard.migrations) {
if (m.slot_ranges.Contains(id)) {
return m.node_info;
}
}
} }
return shard.master;
} }
} }

View file

@ -9,7 +9,6 @@
#include <vector> #include <vector>
#include "src/server/cluster/slot_set.h" #include "src/server/cluster/slot_set.h"
#include "src/server/common.h"
namespace dfly::cluster { namespace dfly::cluster {
@ -59,6 +58,7 @@ class ClusterConfig {
ClusterConfig() = default; ClusterConfig() = default;
std::string my_id_;
ClusterShardInfos config_; ClusterShardInfos config_;
SlotSet my_slots_; SlotSet my_slots_;

View file

@ -96,7 +96,7 @@ TEST_F(ClusterConfigTest, ConfigSetInvalidEmpty) {
TEST_F(ClusterConfigTest, ConfigSetInvalidMissingSlots) { TEST_F(ClusterConfigTest, ConfigSetInvalidMissingSlots) {
EXPECT_EQ(ClusterConfig::CreateFromConfig( EXPECT_EQ(ClusterConfig::CreateFromConfig(
kMyId, {{.slot_ranges = {{.start = 0, .end = 16000}}, kMyId, {{.slot_ranges = SlotRanges({{.start = 0, .end = 16000}}),
.master = {.id = "other", .ip = "192.168.0.100", .port = 7000}, .master = {.id = "other", .ip = "192.168.0.100", .port = 7000},
.replicas = {}, .replicas = {},
.migrations = {}}}), .migrations = {}}}),
@ -105,11 +105,11 @@ TEST_F(ClusterConfigTest, ConfigSetInvalidMissingSlots) {
TEST_F(ClusterConfigTest, ConfigSetInvalidDoubleBookedSlot) { TEST_F(ClusterConfigTest, ConfigSetInvalidDoubleBookedSlot) {
EXPECT_EQ(ClusterConfig::CreateFromConfig( EXPECT_EQ(ClusterConfig::CreateFromConfig(
kMyId, {{.slot_ranges = {{.start = 0, .end = 0x3FFF}}, kMyId, {{.slot_ranges = SlotRanges({{.start = 0, .end = 0x3FFF}}),
.master = {.id = "other", .ip = "192.168.0.100", .port = 7000}, .master = {.id = "other", .ip = "192.168.0.100", .port = 7000},
.replicas = {}, .replicas = {},
.migrations = {}}, .migrations = {}},
{.slot_ranges = {{.start = 0, .end = 0}}, {.slot_ranges = SlotRanges({{.start = 0, .end = 0}}),
.master = {.id = "other2", .ip = "192.168.0.101", .port = 7001}, .master = {.id = "other2", .ip = "192.168.0.101", .port = 7001},
.replicas = {}, .replicas = {},
.migrations = {}}}), .migrations = {}}}),
@ -118,7 +118,7 @@ TEST_F(ClusterConfigTest, ConfigSetInvalidDoubleBookedSlot) {
TEST_F(ClusterConfigTest, ConfigSetInvalidSlotId) { TEST_F(ClusterConfigTest, ConfigSetInvalidSlotId) {
EXPECT_EQ(ClusterConfig::CreateFromConfig( EXPECT_EQ(ClusterConfig::CreateFromConfig(
kMyId, {{.slot_ranges = {{.start = 0, .end = 0x3FFF + 1}}, kMyId, {{.slot_ranges = SlotRanges({{.start = 0, .end = 0x3FFF + 1}}),
.master = {.id = "other", .ip = "192.168.0.100", .port = 7000}, .master = {.id = "other", .ip = "192.168.0.100", .port = 7000},
.replicas = {}, .replicas = {},
.migrations = {}}}), .migrations = {}}}),
@ -127,7 +127,7 @@ TEST_F(ClusterConfigTest, ConfigSetInvalidSlotId) {
TEST_F(ClusterConfigTest, ConfigSetOk) { TEST_F(ClusterConfigTest, ConfigSetOk) {
auto config = ClusterConfig::CreateFromConfig( auto config = ClusterConfig::CreateFromConfig(
kMyId, {{.slot_ranges = {{.start = 0, .end = 0x3FFF}}, kMyId, {{.slot_ranges = SlotRanges({{.start = 0, .end = 0x3FFF}}),
.master = {.id = "other", .ip = "192.168.0.100", .port = 7000}, .master = {.id = "other", .ip = "192.168.0.100", .port = 7000},
.replicas = {}, .replicas = {},
.migrations = {}}}); .migrations = {}}});
@ -139,7 +139,7 @@ TEST_F(ClusterConfigTest, ConfigSetOk) {
TEST_F(ClusterConfigTest, ConfigSetOkWithReplica) { TEST_F(ClusterConfigTest, ConfigSetOkWithReplica) {
auto config = ClusterConfig::CreateFromConfig( auto config = ClusterConfig::CreateFromConfig(
kMyId, {{.slot_ranges = {{.start = 0, .end = 0x3FFF}}, kMyId, {{.slot_ranges = SlotRanges({{.start = 0, .end = 0x3FFF}}),
.master = {.id = "other-master", .ip = "192.168.0.100", .port = 7000}, .master = {.id = "other-master", .ip = "192.168.0.100", .port = 7000},
.replicas = {{.id = "other-replica", .ip = "192.168.0.101", .port = 7001}}, .replicas = {{.id = "other-replica", .ip = "192.168.0.101", .port = 7001}},
.migrations = {}}}); .migrations = {}}});
@ -150,21 +150,21 @@ TEST_F(ClusterConfigTest, ConfigSetOkWithReplica) {
TEST_F(ClusterConfigTest, ConfigSetMultipleInstances) { TEST_F(ClusterConfigTest, ConfigSetMultipleInstances) {
auto config = ClusterConfig::CreateFromConfig( auto config = ClusterConfig::CreateFromConfig(
kMyId, {{.slot_ranges = {{.start = 0, .end = 5'000}}, kMyId, {{.slot_ranges = SlotRanges({{.start = 0, .end = 5'000}}),
.master = {.id = "other-master", .ip = "192.168.0.100", .port = 7000}, .master = {.id = "other-master", .ip = "192.168.0.100", .port = 7000},
.replicas = {{.id = "other-replica", .ip = "192.168.0.101", .port = 7001}}, .replicas = {{.id = "other-replica", .ip = "192.168.0.101", .port = 7001}},
.migrations = {}}, .migrations = {}},
{.slot_ranges = {{.start = 5'001, .end = 10'000}}, {.slot_ranges = SlotRanges({{.start = 5'001, .end = 10'000}}),
.master = {.id = kMyId, .ip = "192.168.0.102", .port = 7002}, .master = {.id = kMyId, .ip = "192.168.0.102", .port = 7002},
.replicas = {{.id = "other-replica2", .ip = "192.168.0.103", .port = 7003}}, .replicas = {{.id = "other-replica2", .ip = "192.168.0.103", .port = 7003}},
.migrations = {}}, .migrations = {}},
{.slot_ranges = {{.start = 10'001, .end = 0x3FFF}}, {.slot_ranges = SlotRanges({{.start = 10'001, .end = 0x3FFF}}),
.master = {.id = "other-master3", .ip = "192.168.0.104", .port = 7004}, .master = {.id = "other-master3", .ip = "192.168.0.104", .port = 7004},
.replicas = {{.id = "other-replica3", .ip = "192.168.0.105", .port = 7005}}, .replicas = {{.id = "other-replica3", .ip = "192.168.0.105", .port = 7005}},
.migrations = {}}}); .migrations = {}}});
EXPECT_NE(config, nullptr); EXPECT_NE(config, nullptr);
SlotSet owned_slots = config->GetOwnedSlots(); SlotSet owned_slots = config->GetOwnedSlots();
EXPECT_EQ(owned_slots.ToSlotRanges().size(), 1); EXPECT_EQ(owned_slots.ToSlotRanges().Size(), 1);
EXPECT_EQ(owned_slots.Count(), 5'000); EXPECT_EQ(owned_slots.Count(), 5'000);
{ {
@ -481,8 +481,8 @@ TEST_F(ClusterConfigTest, ConfigSetMigrations) {
auto config1 = ClusterConfig::CreateFromConfig("id0", config_str); auto config1 = ClusterConfig::CreateFromConfig("id0", config_str);
EXPECT_EQ( EXPECT_EQ(
config1->GetNewOutgoingMigrations(nullptr), config1->GetNewOutgoingMigrations(nullptr),
(std::vector<MigrationInfo>{ (std::vector<MigrationInfo>{{.slot_ranges = SlotRanges({{7000, 8000}}),
{.slot_ranges = {{7000, 8000}}, .node_id = "id1", .ip = "127.0.0.1", .port = 9001}})); .node_info = {.id = "id1", .ip = "127.0.0.1", .port = 9001}}}));
EXPECT_TRUE(config1->GetFinishedOutgoingMigrations(nullptr).empty()); EXPECT_TRUE(config1->GetFinishedOutgoingMigrations(nullptr).empty());
EXPECT_TRUE(config1->GetNewIncomingMigrations(nullptr).empty()); EXPECT_TRUE(config1->GetNewIncomingMigrations(nullptr).empty());
@ -491,8 +491,8 @@ TEST_F(ClusterConfigTest, ConfigSetMigrations) {
auto config2 = ClusterConfig::CreateFromConfig("id1", config_str); auto config2 = ClusterConfig::CreateFromConfig("id1", config_str);
EXPECT_EQ( EXPECT_EQ(
config2->GetNewIncomingMigrations(nullptr), config2->GetNewIncomingMigrations(nullptr),
(std::vector<MigrationInfo>{ (std::vector<MigrationInfo>{{.slot_ranges = SlotRanges({{7000, 8000}}),
{.slot_ranges = {{7000, 8000}}, .node_id = "id0", .ip = "127.0.0.1", .port = 9001}})); .node_info = {.id = "id0", .ip = "127.0.0.1", .port = 9001}}}));
EXPECT_TRUE(config2->GetFinishedOutgoingMigrations(nullptr).empty()); EXPECT_TRUE(config2->GetFinishedOutgoingMigrations(nullptr).empty());
EXPECT_TRUE(config2->GetNewOutgoingMigrations(nullptr).empty()); EXPECT_TRUE(config2->GetNewOutgoingMigrations(nullptr).empty());
@ -523,16 +523,16 @@ TEST_F(ClusterConfigTest, ConfigSetMigrations) {
EXPECT_EQ( EXPECT_EQ(
config4->GetFinishedOutgoingMigrations(config1), config4->GetFinishedOutgoingMigrations(config1),
(std::vector<MigrationInfo>{ (std::vector<MigrationInfo>{{.slot_ranges = SlotRanges({{7000, 8000}}),
{.slot_ranges = {{7000, 8000}}, .node_id = "id1", .ip = "127.0.0.1", .port = 9001}})); .node_info = {.id = "id1", .ip = "127.0.0.1", .port = 9001}}}));
EXPECT_TRUE(config4->GetNewIncomingMigrations(config1).empty()); EXPECT_TRUE(config4->GetNewIncomingMigrations(config1).empty());
EXPECT_TRUE(config4->GetFinishedIncomingMigrations(config1).empty()); EXPECT_TRUE(config4->GetFinishedIncomingMigrations(config1).empty());
EXPECT_TRUE(config4->GetNewOutgoingMigrations(config1).empty()); EXPECT_TRUE(config4->GetNewOutgoingMigrations(config1).empty());
EXPECT_EQ( EXPECT_EQ(
config5->GetFinishedIncomingMigrations(config2), config5->GetFinishedIncomingMigrations(config2),
(std::vector<MigrationInfo>{ (std::vector<MigrationInfo>{{.slot_ranges = SlotRanges({{7000, 8000}}),
{.slot_ranges = {{7000, 8000}}, .node_id = "id0", .ip = "127.0.0.1", .port = 9001}})); .node_info = {.id = "id0", .ip = "127.0.0.1", .port = 9001}}}));
EXPECT_TRUE(config5->GetNewIncomingMigrations(config2).empty()); EXPECT_TRUE(config5->GetNewIncomingMigrations(config2).empty());
EXPECT_TRUE(config5->GetFinishedOutgoingMigrations(config2).empty()); EXPECT_TRUE(config5->GetFinishedOutgoingMigrations(config2).empty());
EXPECT_TRUE(config5->GetNewOutgoingMigrations(config2).empty()); EXPECT_TRUE(config5->GetNewOutgoingMigrations(config2).empty());
@ -589,7 +589,7 @@ TEST_F(ClusterConfigTest, SlotSetAPI) {
ss.Set(5010, true); ss.Set(5010, true);
EXPECT_EQ(ss.ToSlotRanges(), SlotRanges({{0, 2000}, {5010, 5010}})); EXPECT_EQ(ss.ToSlotRanges(), SlotRanges({{0, 2000}, {5010, 5010}}));
ss.Set({SlotRange{5000, 5100}}, true); ss.Set(SlotRanges({{5000, 5100}}), true);
EXPECT_EQ(ss.ToSlotRanges(), SlotRanges({{0, 2000}, {5000, 5100}})); EXPECT_EQ(ss.ToSlotRanges(), SlotRanges({{0, 2000}, {5000, 5100}}));
ss.Set(5050, false); ss.Set(5050, false);
@ -598,7 +598,7 @@ TEST_F(ClusterConfigTest, SlotSetAPI) {
ss.Set(5500, false); ss.Set(5500, false);
EXPECT_EQ(ss.ToSlotRanges(), SlotRanges({{0, 2000}, {5000, 5049}, {5051, 5100}})); EXPECT_EQ(ss.ToSlotRanges(), SlotRanges({{0, 2000}, {5000, 5049}, {5051, 5100}}));
ss.Set({SlotRange{5090, 5100}}, false); ss.Set(SlotRanges({{5090, 5100}}), false);
EXPECT_EQ(ss.ToSlotRanges(), SlotRanges({{0, 2000}, {5000, 5049}, {5051, 5089}})); EXPECT_EQ(ss.ToSlotRanges(), SlotRanges({{0, 2000}, {5000, 5049}, {5051, 5089}}));
SlotSet ss1(SlotRanges({{1001, 2000}})); SlotSet ss1(SlotRanges({{1001, 2000}}));

View file

@ -3,9 +3,13 @@ extern "C" {
#include "redis/crc16.h" #include "redis/crc16.h"
} }
#include <absl/strings/str_cat.h>
#include <absl/strings/str_join.h>
#include "base/flags.h" #include "base/flags.h"
#include "base/logging.h" #include "base/logging.h"
#include "cluster_defs.h" #include "cluster_defs.h"
#include "slot_set.h"
#include "src/server/common.h" #include "src/server/common.h"
using namespace std; using namespace std;
@ -15,6 +19,32 @@ ABSL_FLAG(string, cluster_mode, "",
"'emulated', 'yes' or ''"); "'emulated', 'yes' or ''");
namespace dfly::cluster { namespace dfly::cluster {
std::string SlotRange::ToString() const {
return absl::StrCat("[", start, ", ", end, "]");
}
SlotRanges::SlotRanges(std::vector<SlotRange> ranges) : ranges_(std::move(ranges)) {
std::sort(ranges_.begin(), ranges_.end());
}
void SlotRanges::Merge(const SlotRanges& sr) {
// TODO rewrite it
SlotSet slots(*this);
slots.Set(sr, true);
ranges_ = std::move(slots.ToSlotRanges().ranges_);
}
std::string SlotRanges::ToString() const {
return absl::StrJoin(ranges_, ", ", [](std::string* out, SlotRange range) {
absl::StrAppend(out, range.ToString());
});
}
std::string MigrationInfo::ToString() const {
return absl::StrCat(node_info.id, ",", node_info.ip, ":", node_info.port, " (",
slot_ranges.ToString(), ")");
}
namespace { namespace {
enum class ClusterMode { enum class ClusterMode {
kUninitialized, kUninitialized,

View file

@ -4,10 +4,7 @@
#pragma once #pragma once
#include <absl/strings/str_cat.h> #include <cstdint>
#include <absl/strings/str_join.h>
#include <memory>
#include <string> #include <string>
#include <string_view> #include <string_view>
#include <vector> #include <vector>
@ -24,45 +21,85 @@ struct SlotRange {
SlotId start = 0; SlotId start = 0;
SlotId end = 0; SlotId end = 0;
bool operator==(const SlotRange& r) const { bool operator==(const SlotRange& r) const noexcept {
return start == r.start && end == r.end; return start == r.start && end == r.end;
} }
bool IsValid() {
bool operator<(const SlotRange& r) const noexcept {
return start < r.start || (start == r.start && end < r.end);
}
bool IsValid() const noexcept {
return start <= end && start <= kMaxSlotId && end <= kMaxSlotId; return start <= end && start <= kMaxSlotId && end <= kMaxSlotId;
} }
std::string ToString() const { bool Contains(SlotId id) const noexcept {
return absl::StrCat("[", start, ", ", end, "]"); return id >= start && id <= end;
} }
static std::string ToString(const std::vector<SlotRange>& ranges) { std::string ToString() const;
return absl::StrJoin(ranges, ", ", [](std::string* out, SlotRange range) {
absl::StrAppend(out, range.ToString());
});
}
}; };
using SlotRanges = std::vector<SlotRange>; class SlotRanges {
public:
SlotRanges() = default;
explicit SlotRanges(std::vector<SlotRange> ranges);
bool Contains(SlotId id) const noexcept {
for (const auto& sr : ranges_) {
if (sr.Contains(id))
return true;
}
return false;
}
size_t Size() const noexcept {
return ranges_.size();
}
bool Empty() const noexcept {
return ranges_.empty();
}
void Merge(const SlotRanges& sr);
bool operator==(const SlotRanges& r) const noexcept {
return ranges_ == r.ranges_;
}
std::string ToString() const;
auto begin() const noexcept {
return ranges_.cbegin();
}
auto end() const noexcept {
return ranges_.cend();
}
private:
std::vector<SlotRange> ranges_;
};
struct ClusterNodeInfo { struct ClusterNodeInfo {
std::string id; std::string id;
std::string ip; std::string ip;
uint16_t port = 0; uint16_t port = 0;
bool operator==(const ClusterNodeInfo& r) const noexcept {
return port == r.port && ip == r.ip && id == r.id;
}
}; };
struct MigrationInfo { struct MigrationInfo {
std::vector<SlotRange> slot_ranges; SlotRanges slot_ranges;
std::string node_id; ClusterNodeInfo node_info;
std::string ip;
uint16_t port = 0;
bool operator==(const MigrationInfo& r) const { bool operator==(const MigrationInfo& r) const noexcept {
return ip == r.ip && port == r.port && slot_ranges == r.slot_ranges && node_id == r.node_id; return node_info == r.node_info && slot_ranges == r.slot_ranges;
} }
std::string ToString() const { std::string ToString() const;
return absl::StrCat(node_id, ",", ip, ":", port, " (", SlotRange::ToString(slot_ranges), ")");
}
}; };
struct ClusterShardInfo { struct ClusterShardInfo {

View file

@ -79,7 +79,7 @@ ClusterConfig* ClusterFamily::cluster_config() {
} }
ClusterShardInfo ClusterFamily::GetEmulatedShardInfo(ConnectionContext* cntx) const { ClusterShardInfo ClusterFamily::GetEmulatedShardInfo(ConnectionContext* cntx) const {
ClusterShardInfo info{.slot_ranges = {{.start = 0, .end = kMaxSlotNum}}, ClusterShardInfo info{.slot_ranges = SlotRanges({{.start = 0, .end = kMaxSlotNum}}),
.master = {}, .master = {},
.replicas = {}, .replicas = {},
.migrations = {}}; .migrations = {}};
@ -160,7 +160,7 @@ void ClusterShardsImpl(const ClusterShardInfos& config, ConnectionContext* cntx)
rb->StartArray(kEntrySize); rb->StartArray(kEntrySize);
rb->SendBulkString("slots"); rb->SendBulkString("slots");
rb->StartArray(shard.slot_ranges.size() * 2); rb->StartArray(shard.slot_ranges.Size() * 2);
for (const auto& slot_range : shard.slot_ranges) { for (const auto& slot_range : shard.slot_ranges) {
rb->SendLong(slot_range.start); rb->SendLong(slot_range.start);
rb->SendLong(slot_range.end); rb->SendLong(slot_range.end);
@ -200,7 +200,7 @@ void ClusterSlotsImpl(const ClusterShardInfos& config, ConnectionContext* cntx)
unsigned int slot_ranges = 0; unsigned int slot_ranges = 0;
for (const auto& shard : config) { for (const auto& shard : config) {
slot_ranges += shard.slot_ranges.size(); slot_ranges += shard.slot_ranges.Size();
} }
rb->StartArray(slot_ranges); rb->StartArray(slot_ranges);
@ -237,7 +237,7 @@ void ClusterNodesImpl(const ClusterShardInfos& config, string_view my_id, Connec
string result; string result;
auto WriteNode = [&](const ClusterNodeInfo& node, string_view role, string_view master_id, auto WriteNode = [&](const ClusterNodeInfo& node, string_view role, string_view master_id,
const vector<SlotRange>& ranges) { const SlotRanges& ranges) {
absl::StrAppend(&result, node.id, " "); absl::StrAppend(&result, node.id, " ");
absl::StrAppend(&result, node.ip, ":", node.port, "@", node.port, " "); absl::StrAppend(&result, node.ip, ":", node.port, "@", node.port, " ");
@ -312,7 +312,7 @@ void ClusterInfoImpl(const ClusterShardInfos& config, ConnectionContext* cntx) {
known_nodes += 1; // For master known_nodes += 1; // For master
known_nodes += shard_config.replicas.size(); known_nodes += shard_config.replicas.size();
if (!shard_config.slot_ranges.empty()) { if (!shard_config.slot_ranges.Empty()) {
++cluster_size; ++cluster_size;
} }
} }
@ -436,7 +436,7 @@ void ClusterFamily::DflyClusterMyId(CmdArgList args, ConnectionContext* cntx) {
namespace { namespace {
void DeleteSlots(const SlotRanges& slots_ranges) { void DeleteSlots(const SlotRanges& slots_ranges) {
if (slots_ranges.empty()) { if (slots_ranges.Empty()) {
return; return;
} }
@ -451,13 +451,13 @@ void DeleteSlots(const SlotRanges& slots_ranges) {
} }
void WriteFlushSlotsToJournal(const SlotRanges& slot_ranges) { void WriteFlushSlotsToJournal(const SlotRanges& slot_ranges) {
if (slot_ranges.empty()) { if (slot_ranges.Empty()) {
return; return;
} }
// Build args // Build args
vector<string> args; vector<string> args;
args.reserve(slot_ranges.size() + 1); args.reserve(slot_ranges.Size() + 1);
args.push_back("FLUSHSLOTS"); args.push_back("FLUSHSLOTS");
for (SlotRange range : slot_ranges) { for (SlotRange range : slot_ranges) {
args.push_back(absl::StrCat(range.start)); args.push_back(absl::StrCat(range.start));
@ -517,14 +517,12 @@ void ClusterFamily::DflyClusterConfig(CmdArgList args, ConnectionContext* cntx)
// set_config_mu is unlocked and even if we apply the same changes 2 times it's not a problem // set_config_mu is unlocked and even if we apply the same changes 2 times it's not a problem
for (const auto& m : incoming_migrations_jobs_) { for (const auto& m : incoming_migrations_jobs_) {
if (m->GetState() == MigrationState::C_FINISHED) { if (m->GetState() == MigrationState::C_FINISHED) {
const auto& slots = m->GetSlots(); enable_slots.Merge(m->GetSlots());
enable_slots.insert(enable_slots.end(), slots.begin(), slots.end());
} }
} }
for (const auto& m : outgoing_migration_jobs_) { for (const auto& m : outgoing_migration_jobs_) {
if (m->GetState() == MigrationState::C_FINISHED) { if (m->GetState() == MigrationState::C_FINISHED) {
const auto& slots = m->GetSlots(); disable_slots.Merge(m->GetSlots());
disable_slots.insert(disable_slots.end(), slots.begin(), slots.end());
} }
} }
} }
@ -554,17 +552,16 @@ void ClusterFamily::DflyClusterConfig(CmdArgList args, ConnectionContext* cntx)
DCHECK(tl_cluster_config != nullptr); DCHECK(tl_cluster_config != nullptr);
if (!tracker.Wait(absl::Seconds(1))) { if (!tracker.Wait(absl::Seconds(1))) {
LOG(WARNING) << "Cluster config change timed out"; LOG(WARNING) << "Cluster config change timed for: " << MyID();
} }
SlotSet after = tl_cluster_config->GetOwnedSlots(); SlotSet after = tl_cluster_config->GetOwnedSlots();
if (ServerState::tlocal()->is_master) { if (ServerState::tlocal()->is_master) {
auto deleted_slots = (before.GetRemovedSlots(after)).ToSlotRanges(); auto deleted_slots = (before.GetRemovedSlots(after)).ToSlotRanges();
deleted_slots.insert(deleted_slots.end(), out_migrations_slots.begin(), deleted_slots.Merge(out_migrations_slots);
out_migrations_slots.end());
LOG_IF(INFO, !deleted_slots.empty())
<< "Flushing newly unowned slots: " << SlotRange::ToString(deleted_slots);
DeleteSlots(deleted_slots); DeleteSlots(deleted_slots);
LOG_IF(INFO, !deleted_slots.Empty())
<< "Flushing newly unowned slots: " << deleted_slots.ToString();
WriteFlushSlotsToJournal(deleted_slots); WriteFlushSlotsToJournal(deleted_slots);
} }
@ -619,7 +616,7 @@ void ClusterFamily::DflyClusterGetSlotInfo(CmdArgList args, ConnectionContext* c
} }
void ClusterFamily::DflyClusterFlushSlots(CmdArgList args, ConnectionContext* cntx) { void ClusterFamily::DflyClusterFlushSlots(CmdArgList args, ConnectionContext* cntx) {
SlotRanges slot_ranges; std::vector<SlotRange> slot_ranges;
CmdArgParser parser(args); CmdArgParser parser(args);
do { do {
@ -630,7 +627,7 @@ void ClusterFamily::DflyClusterFlushSlots(CmdArgList args, ConnectionContext* cn
if (auto err = parser.Error(); err) if (auto err = parser.Error(); err)
return cntx->SendError(err->MakeReply()); return cntx->SendError(err->MakeReply());
DeleteSlots(slot_ranges); DeleteSlots(SlotRanges(std::move(slot_ranges)));
return cntx->SendOk(); return cntx->SendOk();
} }
@ -692,8 +689,8 @@ void ClusterFamily::DflySlotMigrationStatus(CmdArgList args, ConnectionContext*
m->GetErrorStr()); m->GetErrorStr());
} }
for (const auto& m : outgoing_migration_jobs_) { for (const auto& m : outgoing_migration_jobs_) {
append_answer("out", m->GetMigrationInfo().node_id, node_id, m->GetState(), m->GetKeyCount(), append_answer("out", m->GetMigrationInfo().node_info.id, node_id, m->GetState(),
m->GetErrorStr()); m->GetKeyCount(), m->GetErrorStr());
} }
if (reply.empty()) { if (reply.empty()) {
@ -741,14 +738,14 @@ SlotRanges ClusterFamily::RemoveOutgoingMigrations(shared_ptr<ClusterConfig> new
auto it = std::find_if(outgoing_migration_jobs_.begin(), outgoing_migration_jobs_.end(), auto it = std::find_if(outgoing_migration_jobs_.begin(), outgoing_migration_jobs_.end(),
[&m](const auto& om) { [&m](const auto& om) {
// we can have only one migration per target-source pair // we can have only one migration per target-source pair
return m.node_id == om->GetMigrationInfo().node_id; return m.node_info.id == om->GetMigrationInfo().node_info.id;
}); });
DCHECK(it != outgoing_migration_jobs_.end()); DCHECK(it != outgoing_migration_jobs_.end());
DCHECK(it->get() != nullptr); DCHECK(it->get() != nullptr);
OutgoingMigration& migration = *it->get(); OutgoingMigration& migration = *it->get();
const auto& slots = migration.GetSlots(); const auto& slots = migration.GetSlots();
removed_slots.insert(removed_slots.end(), slots.begin(), slots.end()); removed_slots.Merge(slots);
LOG(INFO) << "Outgoing migration cancelled: slots " << SlotRange::ToString(slots) << " to " LOG(INFO) << "Outgoing migration cancelled: slots " << slots.ToString() << " to "
<< migration.GetHostIp() << ":" << migration.GetPort(); << migration.GetHostIp() << ":" << migration.GetPort();
migration.Finish(); migration.Finish();
outgoing_migration_jobs_.erase(it); outgoing_migration_jobs_.erase(it);
@ -789,7 +786,7 @@ bool RemoveIncomingMigrationImpl(std::vector<std::shared_ptr<IncomingSlotMigrati
auto removed_ranges = removed.ToSlotRanges(); auto removed_ranges = removed.ToSlotRanges();
LOG_IF(WARNING, migration->GetState() == MigrationState::C_FINISHED) LOG_IF(WARNING, migration->GetState() == MigrationState::C_FINISHED)
<< "Flushing slots of removed FINISHED migration " << migration->GetSourceID() << "Flushing slots of removed FINISHED migration " << migration->GetSourceID()
<< ", slots: " << SlotRange::ToString(removed_ranges); << ", slots: " << removed_ranges.ToString();
DeleteSlots(removed_ranges); DeleteSlots(removed_ranges);
} }
@ -800,8 +797,8 @@ bool RemoveIncomingMigrationImpl(std::vector<std::shared_ptr<IncomingSlotMigrati
void ClusterFamily::RemoveIncomingMigrations(const std::vector<MigrationInfo>& migrations) { void ClusterFamily::RemoveIncomingMigrations(const std::vector<MigrationInfo>& migrations) {
lock_guard lk(migration_mu_); lock_guard lk(migration_mu_);
for (const auto& m : migrations) { for (const auto& m : migrations) {
RemoveIncomingMigrationImpl(incoming_migrations_jobs_, m.node_id); RemoveIncomingMigrationImpl(incoming_migrations_jobs_, m.node_info.id);
VLOG(1) << "Migration was canceled from: " << m.node_id; VLOG(1) << "Migration was canceled from: " << m.node_info.id;
} }
} }
@ -811,7 +808,7 @@ void ClusterFamily::InitMigration(CmdArgList args, ConnectionContext* cntx) {
auto [source_id, flows_num] = parser.Next<std::string, uint32_t>(); auto [source_id, flows_num] = parser.Next<std::string, uint32_t>();
SlotRanges slots; std::vector<SlotRange> slots;
do { do {
auto [slot_start, slot_end] = parser.Next<SlotId, SlotId>(); auto [slot_start, slot_end] = parser.Next<SlotId, SlotId>();
slots.emplace_back(SlotRange{slot_start, slot_end}); slots.emplace_back(SlotRange{slot_start, slot_end});
@ -824,7 +821,7 @@ void ClusterFamily::InitMigration(CmdArgList args, ConnectionContext* cntx) {
bool found = any_of(incoming_migrations.begin(), incoming_migrations.end(), bool found = any_of(incoming_migrations.begin(), incoming_migrations.end(),
[&](const MigrationInfo& info) { [&](const MigrationInfo& info) {
// TODO: also compare slot ranges (in an order-agnostic way) // TODO: also compare slot ranges (in an order-agnostic way)
return info.node_id == source_id; return info.node_info.id == source_id;
}); });
if (!found) { if (!found) {
VLOG(1) << "Unrecognized incoming migration from " << source_id; VLOG(1) << "Unrecognized incoming migration from " << source_id;
@ -838,7 +835,7 @@ void ClusterFamily::InitMigration(CmdArgList args, ConnectionContext* cntx) {
LOG_IF(WARNING, was_removed) << "Reinit issued for migration from:" << source_id; LOG_IF(WARNING, was_removed) << "Reinit issued for migration from:" << source_id;
incoming_migrations_jobs_.emplace_back(make_shared<IncomingSlotMigration>( incoming_migrations_jobs_.emplace_back(make_shared<IncomingSlotMigration>(
std::move(source_id), &server_family_->service(), std::move(slots), flows_num)); std::move(source_id), &server_family_->service(), SlotRanges(std::move(slots)), flows_num));
return cntx->SendOk(); return cntx->SendOk();
} }
@ -881,8 +878,11 @@ void ClusterFamily::DflyMigrateFlow(CmdArgList args, ConnectionContext* cntx) {
void ClusterFamily::ApplyMigrationSlotRangeToConfig(std::string_view node_id, void ClusterFamily::ApplyMigrationSlotRangeToConfig(std::string_view node_id,
const SlotRanges& slots, bool is_incoming) { const SlotRanges& slots, bool is_incoming) {
VLOG(1) << "Update config for slots ranges: " << slots.ToString() << " for " << MyID() << " : "
<< node_id;
lock_guard gu(set_config_mu); lock_guard gu(set_config_mu);
lock_guard lk(migration_mu_); lock_guard lk(migration_mu_);
bool is_migration_valid = false; bool is_migration_valid = false;
if (is_incoming) { if (is_incoming) {
for (const auto& mj : incoming_migrations_jobs_) { for (const auto& mj : incoming_migrations_jobs_) {
@ -893,14 +893,17 @@ void ClusterFamily::ApplyMigrationSlotRangeToConfig(std::string_view node_id,
} }
} else { } else {
for (const auto& mj : outgoing_migration_jobs_) { for (const auto& mj : outgoing_migration_jobs_) {
if (mj->GetMigrationInfo().node_id == node_id) { if (mj->GetMigrationInfo().node_info.id == node_id) {
// TODO add compare for slots // TODO add compare for slots
is_migration_valid = true; is_migration_valid = true;
} }
} }
} }
if (!is_migration_valid) if (!is_migration_valid) {
LOG(WARNING) << "Config wasn't updated for slots ranges: " << slots.ToString() << " for "
<< MyID() << " : " << node_id;
return; return;
}
auto new_config = is_incoming ? tl_cluster_config->CloneWithChanges(slots, {}) auto new_config = is_incoming ? tl_cluster_config->CloneWithChanges(slots, {})
: tl_cluster_config->CloneWithChanges({}, slots); : tl_cluster_config->CloneWithChanges({}, slots);
@ -910,6 +913,8 @@ void ClusterFamily::ApplyMigrationSlotRangeToConfig(std::string_view node_id,
server_family_->service().proactor_pool().AwaitFiberOnAll( server_family_->service().proactor_pool().AwaitFiberOnAll(
[&new_config](util::ProactorBase*) { tl_cluster_config = new_config; }); [&new_config](util::ProactorBase*) { tl_cluster_config = new_config; });
DCHECK(tl_cluster_config != nullptr); DCHECK(tl_cluster_config != nullptr);
VLOG(1) << "Config is updated for slots ranges: " << slots.ToString() << " for " << MyID()
<< " : " << node_id;
} }
void ClusterFamily::DflyMigrateAck(CmdArgList args, ConnectionContext* cntx) { void ClusterFamily::DflyMigrateAck(CmdArgList args, ConnectionContext* cntx) {
@ -923,7 +928,7 @@ void ClusterFamily::DflyMigrateAck(CmdArgList args, ConnectionContext* cntx) {
VLOG(1) << "DFLYMIGRATE ACK" << args; VLOG(1) << "DFLYMIGRATE ACK" << args;
auto in_migrations = tl_cluster_config->GetIncomingMigrations(); auto in_migrations = tl_cluster_config->GetIncomingMigrations();
auto m_it = std::find_if(in_migrations.begin(), in_migrations.end(), auto m_it = std::find_if(in_migrations.begin(), in_migrations.end(),
[source_id](const auto& m) { return m.node_id == source_id; }); [source_id](const auto& m) { return m.node_info.id == source_id; });
if (m_it == in_migrations.end()) { if (m_it == in_migrations.end()) {
LOG(WARNING) << "migration isn't in config"; LOG(WARNING) << "migration isn't in config";
return cntx->SendLong(OutgoingMigration::kInvalidAttempt); return cntx->SendLong(OutgoingMigration::kInvalidAttempt);
@ -937,10 +942,7 @@ void ClusterFamily::DflyMigrateAck(CmdArgList args, ConnectionContext* cntx) {
return cntx->SendError("Join timeout happened"); return cntx->SendError("Join timeout happened");
} }
VLOG(1) << "Migration is joined for " << source_id;
ApplyMigrationSlotRangeToConfig(migration->GetSourceID(), migration->GetSlots(), true); ApplyMigrationSlotRangeToConfig(migration->GetSourceID(), migration->GetSlots(), true);
VLOG(1) << "Config is updated for " << MyID();
return cntx->SendLong(attempt); return cntx->SendLong(attempt);
} }

View file

@ -144,7 +144,7 @@ bool IncomingSlotMigration::Join() {
void IncomingSlotMigration::Stop() { void IncomingSlotMigration::Stop() {
string_view log_state = state_.load() == MigrationState::C_FINISHED ? "Finishing" : "Cancelling"; string_view log_state = state_.load() == MigrationState::C_FINISHED ? "Finishing" : "Cancelling";
LOG(INFO) << log_state << " incoming migration of slots " << SlotRange::ToString(slots_); LOG(INFO) << log_state << " incoming migration of slots " << slots_.ToString();
cntx_.Cancel(); cntx_.Cancel();
for (auto& flow : shard_flows_) { for (auto& flow : shard_flows_) {
@ -156,11 +156,10 @@ void IncomingSlotMigration::Stop() {
} }
void IncomingSlotMigration::StartFlow(uint32_t shard, util::FiberSocketBase* source) { void IncomingSlotMigration::StartFlow(uint32_t shard, util::FiberSocketBase* source) {
VLOG(1) << "Start flow for shard: " << shard;
state_.store(MigrationState::C_SYNC); state_.store(MigrationState::C_SYNC);
shard_flows_[shard]->Start(&cntx_, source, bc_); shard_flows_[shard]->Start(&cntx_, source, bc_);
VLOG(1) << "Incoming slot migration flow for shard: " << shard << " finished"; VLOG(1) << "Incoming flow: " << shard << " finished for " << source_id_;
} }
size_t IncomingSlotMigration::GetKeyCount() const { size_t IncomingSlotMigration::GetKeyCount() const {

View file

@ -78,7 +78,7 @@ class OutgoingMigration::SliceSlotMigration : private ProtocolClient {
}; };
OutgoingMigration::OutgoingMigration(MigrationInfo info, ClusterFamily* cf, ServerFamily* sf) OutgoingMigration::OutgoingMigration(MigrationInfo info, ClusterFamily* cf, ServerFamily* sf)
: ProtocolClient(info.ip, info.port), : ProtocolClient(info.node_info.ip, info.node_info.port),
migration_info_(std::move(info)), migration_info_(std::move(info)),
slot_migrations_(shard_set->size()), slot_migrations_(shard_set->size()),
server_family_(sf), server_family_(sf),
@ -107,6 +107,8 @@ bool OutgoingMigration::ChangeState(MigrationState new_state) {
} }
void OutgoingMigration::Finish(bool is_error) { void OutgoingMigration::Finish(bool is_error) {
VLOG(1) << "Finish outgoing migration for " << cf_->MyID() << " : "
<< migration_info_.node_info.id;
bool should_cancel_flows = false; bool should_cancel_flows = false;
{ {
@ -217,20 +219,18 @@ void OutgoingMigration::SyncFb() {
}); });
if (CheckFlowsForErrors()) { if (CheckFlowsForErrors()) {
VLOG(1) << "Errors detected, retrying outgoing migration"; LOG(WARNING) << "Errors detected, retrying outgoing migration";
continue; continue;
} }
VLOG(2) << "Migrations snapshot is finished";
long attempt = 0; long attempt = 0;
while (GetState() != MigrationState::C_FINISHED && !FinalizeMigration(++attempt)) { while (GetState() != MigrationState::C_FINISHED && !FinalizeMigration(++attempt)) {
// process commands that were on pause and try again // process commands that were on pause and try again
VLOG(2) << "Waiting for migration to finalize..."; VLOG(1) << "Waiting for migration to finalize...";
ThisFiber::SleepFor(500ms); ThisFiber::SleepFor(500ms);
} }
if (CheckFlowsForErrors()) { if (CheckFlowsForErrors()) {
VLOG(1) << "Errors detected, retrying outgoing migration"; LOG(WARNING) << "Errors detected, retrying outgoing migration";
continue; continue;
} }
break; break;
@ -242,6 +242,7 @@ void OutgoingMigration::SyncFb() {
bool OutgoingMigration::FinalizeMigration(long attempt) { bool OutgoingMigration::FinalizeMigration(long attempt) {
// if it's not the 1st attempt and flows are work correctly we try to reconnect and ACK one more // if it's not the 1st attempt and flows are work correctly we try to reconnect and ACK one more
// time // time
VLOG(1) << "FinalizeMigration for " << cf_->MyID() << " : " << migration_info_.node_info.id;
if (attempt > 1) { if (attempt > 1) {
if (CheckFlowsForErrors()) { if (CheckFlowsForErrors()) {
Finish(true); Finish(true);
@ -271,11 +272,11 @@ bool OutgoingMigration::FinalizeMigration(long attempt) {
auto cb = [this](util::ProactorBase* pb) { auto cb = [this](util::ProactorBase* pb) {
if (const auto* shard = EngineShard::tlocal(); shard) { if (const auto* shard = EngineShard::tlocal(); shard) {
VLOG(1) << "FINALIZE outgoing migration" << shard->shard_id();
slot_migrations_[shard->shard_id()]->Finalize(); slot_migrations_[shard->shard_id()]->Finalize();
} }
}; };
VLOG(1) << "FINALIZE flows for " << cf_->MyID() << " : " << migration_info_.node_info.id;
shard_set->pool()->AwaitFiberOnAll(std::move(cb)); shard_set->pool()->AwaitFiberOnAll(std::move(cb));
auto cmd = absl::StrCat("DFLYMIGRATE ACK ", cf_->MyID(), " ", attempt); auto cmd = absl::StrCat("DFLYMIGRATE ACK ", cf_->MyID(), " ", attempt);
@ -309,9 +310,8 @@ bool OutgoingMigration::FinalizeMigration(long attempt) {
Finish(is_error); Finish(is_error);
if (!is_error) { if (!is_error) {
keys_number_ = cluster::GetKeyCount(migration_info_.slot_ranges); keys_number_ = cluster::GetKeyCount(migration_info_.slot_ranges);
cf_->ApplyMigrationSlotRangeToConfig(migration_info_.node_id, migration_info_.slot_ranges, cf_->ApplyMigrationSlotRangeToConfig(migration_info_.node_info.id, migration_info_.slot_ranges,
false); false);
VLOG(1) << "Config is updated for " << cf_->MyID();
} }
return true; return true;
} }

View file

@ -68,7 +68,7 @@ class SlotSet {
} }
SlotRanges ToSlotRanges() const { SlotRanges ToSlotRanges() const {
SlotRanges res; std::vector<SlotRange> res;
for (SlotId i = 0; i < kSlotsNumber; ++i) { for (SlotId i = 0; i < kSlotsNumber; ++i) {
if (!slots_->test(i)) { if (!slots_->test(i)) {
@ -81,7 +81,7 @@ class SlotSet {
} }
} }
return res; return SlotRanges(res);
} }
private: private:

View file

@ -1041,7 +1041,7 @@ async def test_cluster_flushall_during_migration(
df_factory.create( df_factory.create(
port=BASE_PORT + i, port=BASE_PORT + i,
admin_port=BASE_PORT + i + 1000, admin_port=BASE_PORT + i + 1000,
vmodule="cluster_family=9,cluster_slot_migration=9,outgoing_slot_migration=9", vmodule="cluster_family=9,cluster_slot_migration=9,outgoing_slot_migration=9,incoming_slot_migration=9",
logtostdout=True, logtostdout=True,
) )
for i in range(2) for i in range(2)
@ -1265,7 +1265,7 @@ async def test_cluster_fuzzymigration(
df_factory.create( df_factory.create(
port=BASE_PORT + i, port=BASE_PORT + i,
admin_port=BASE_PORT + i + 1000, admin_port=BASE_PORT + i + 1000,
vmodule="cluster_family=9,cluster_slot_migration=9", vmodule="outgoing_slot_migration=9,cluster_family=9,incoming_slot_migration=9",
) )
for i in range(node_count) for i in range(node_count)
] ]
@ -1292,9 +1292,6 @@ async def test_cluster_fuzzymigration(
await client.lpush(key, i) await client.lpush(key, i)
except asyncio.exceptions.CancelledError: except asyncio.exceptions.CancelledError:
return return
# TODO find the reason of TTL exhausted error and is it possible to fix it
except redis.exceptions.ClusterError:
return
# Start ten counters # Start ten counters
counter_keys = [f"_counter{i}" for i in range(10)] counter_keys = [f"_counter{i}" for i in range(10)]
@ -1347,7 +1344,8 @@ async def test_cluster_fuzzymigration(
res = True res = True
for node in nodes: for node in nodes:
states = await node.admin_client.execute_command("DFLYCLUSTER", "SLOT-MIGRATION-STATUS") states = await node.admin_client.execute_command("DFLYCLUSTER", "SLOT-MIGRATION-STATUS")
logging.debug(states) if states != "NO_STATE":
logging.debug(states)
for state in states: for state in states:
parsed_state = re.search("([a-z]+) ([a-z0-9]+) ([A-Z]+)", state) parsed_state = re.search("([a-z]+) ([a-z0-9]+) ([A-Z]+)", state)
if parsed_state == None: if parsed_state == None: