in src/kudu/tools/rebalance_algo.cc [204:338]
Status TwoDimensionalGreedyAlgo::GetNextMove(
const ClusterBalanceInfo& cluster_info,
boost::optional<TableReplicaMove>* move) {
DCHECK(move);
// Set the output to none: this fits the short-circuit cases when there is
// an issue with the parameters or there aren't any moves to return.
*move = boost::none;
// Due to the nature of the table_info_by_skew container, the very last
// range represents the most unbalanced tables.
const auto& table_info_by_skew = cluster_info.table_info_by_skew;
if (table_info_by_skew.empty()) {
return Status::InvalidArgument("no table balance information");
}
const auto max_table_skew = table_info_by_skew.rbegin()->first;
const auto& servers_by_total_replica_count =
cluster_info.servers_by_total_replica_count;
if (servers_by_total_replica_count.empty()) {
return Status::InvalidArgument("no per-server replica count information");
}
const auto max_server_skew =
servers_by_total_replica_count.rbegin()->first -
servers_by_total_replica_count.begin()->first;
if (max_table_skew == 0) {
// Every table is balanced and any move will unbalance a table, so there
// is no potential for the greedy algorithm to balance the cluster.
return Status::OK();
}
if (max_table_skew <= 1 && max_server_skew <= 1) {
// Every table is balanced and the cluster as a whole is balanced.
return Status::OK();
}
// Among the tables with maximum skew, attempt to pick a table where there is
// a move that improves the table skew and the cluster skew, if possible. If
// not, attempt to pick a move that improves the table skew. If all tables
// are balanced, attempt to pick a move that preserves table balance and
// improves cluster skew.
const auto range = table_info_by_skew.equal_range(max_table_skew);
for (auto it = range.first; it != range.second; ++it) {
const TableBalanceInfo& tbi = it->second;
const auto& servers_by_table_replica_count = tbi.servers_by_replica_count;
if (servers_by_table_replica_count.empty()) {
return Status::InvalidArgument(Substitute(
"no information on replicas of table $0", tbi.table_id));
}
const auto min_replica_count = servers_by_table_replica_count.begin()->first;
const auto max_replica_count = servers_by_table_replica_count.rbegin()->first;
VLOG(1) << Substitute(
"balancing table $0 with replica count skew $1 "
"(min_replica_count: $2, max_replica_count: $3)",
tbi.table_id, table_info_by_skew.rbegin()->first,
min_replica_count, max_replica_count);
// Compute the intersection of the tablet servers most loaded for the table
// with the tablet servers most loaded overall, and likewise for least loaded.
// These are our ideal candidates for moving from and to, respectively.
int32_t max_count_table;
int32_t max_count_total;
vector<string> max_loaded;
vector<string> max_loaded_intersection;
RETURN_NOT_OK(GetIntersection(
ExtremumType::MAX,
servers_by_table_replica_count, servers_by_total_replica_count,
&max_count_table, &max_count_total,
&max_loaded, &max_loaded_intersection));
int32_t min_count_table;
int32_t min_count_total;
vector<string> min_loaded;
vector<string> min_loaded_intersection;
RETURN_NOT_OK(GetIntersection(
ExtremumType::MIN,
servers_by_table_replica_count, servers_by_total_replica_count,
&min_count_table, &min_count_total,
&min_loaded, &min_loaded_intersection));
VLOG(1) << Substitute("table-wise : min_count: $0, max_count: $1",
min_count_table, max_count_table);
VLOG(1) << Substitute("cluster-wise: min_count: $0, max_count: $1",
min_count_total, max_count_total);
if (PREDICT_FALSE(VLOG_IS_ON(1))) {
ostringstream s;
s << "[ ";
for (const auto& e : max_loaded_intersection) {
s << e << " ";
}
s << "]";
VLOG(1) << "max_loaded_intersection: " << s.str();
s.str("");
s << "[ ";
for (const auto& e : min_loaded_intersection) {
s << e << " ";
}
s << "]";
VLOG(1) << "min_loaded_intersection: " << s.str();
}
// Do not move replicas of a balanced table if the least (most) loaded
// servers overall do not intersect the servers hosting the least (most)
// replicas of the table. Moving a replica in that case might keep the
// cluster skew the same or make it worse while keeping the table balanced.
if ((max_count_table <= min_count_table + 1) &&
(min_loaded_intersection.empty() || max_loaded_intersection.empty())) {
continue;
}
if (equal_skew_opt_ == EqualSkewOption::PICK_RANDOM) {
shuffle(min_loaded.begin(), min_loaded.end(), generator_);
shuffle(min_loaded_intersection.begin(), min_loaded_intersection.end(),
generator_);
shuffle(max_loaded.begin(), max_loaded.end(), generator_);
shuffle(max_loaded_intersection.begin(), max_loaded_intersection.end(),
generator_);
}
const auto& min_loaded_uuid = min_loaded_intersection.empty()
? min_loaded.front() : min_loaded_intersection.front();
const auto& max_loaded_uuid = max_loaded_intersection.empty()
? max_loaded.back() : max_loaded_intersection.back();
VLOG(1) << Substitute("min_loaded_uuid: $0, max_loaded_uuid: $1",
min_loaded_uuid, max_loaded_uuid);
if (min_loaded_uuid == max_loaded_uuid) {
// Nothing to move.
continue;
}
// Move a replica of the selected table from a most loaded server to a
// least loaded server.
*move = { tbi.table_id, max_loaded_uuid, min_loaded_uuid };
break;
}
return Status::OK();
}