Status TwoDimensionalGreedyAlgo::GetNextMove()

in src/kudu/tools/rebalance_algo.cc [204:338]


Status TwoDimensionalGreedyAlgo::GetNextMove(
    const ClusterBalanceInfo& cluster_info,
    boost::optional<TableReplicaMove>* move) {
  DCHECK(move);
  // Set the output to none: this fits the short-circuit cases when there is
  // an issue with the parameters or there aren't any moves to return.
  *move = boost::none;

  // Due to the nature of the table_info_by_skew container, the very last
  // range represents the most unbalanced tables.
  const auto& table_info_by_skew = cluster_info.table_info_by_skew;
  if (table_info_by_skew.empty()) {
    return Status::InvalidArgument("no table balance information");
  }
  const auto max_table_skew = table_info_by_skew.rbegin()->first;

  const auto& servers_by_total_replica_count =
      cluster_info.servers_by_total_replica_count;
  if (servers_by_total_replica_count.empty()) {
    return Status::InvalidArgument("no per-server replica count information");
  }
  const auto max_server_skew =
      servers_by_total_replica_count.rbegin()->first -
      servers_by_total_replica_count.begin()->first;

  if (max_table_skew == 0) {
    // Every table is balanced and any move will unbalance a table, so there
    // is no potential for the greedy algorithm to balance the cluster.
    return Status::OK();
  }
  if (max_table_skew <= 1 && max_server_skew <= 1) {
    // Every table is balanced and the cluster as a whole is balanced.
    return Status::OK();
  }

  // Among the tables with maximum skew, attempt to pick a table where there is
  // a move that improves the table skew and the cluster skew, if possible. If
  // not, attempt to pick a move that improves the table skew. If all tables
  // are balanced, attempt to pick a move that preserves table balance and
  // improves cluster skew.
  const auto range = table_info_by_skew.equal_range(max_table_skew);
  for (auto it = range.first; it != range.second; ++it) {
    const TableBalanceInfo& tbi = it->second;
    const auto& servers_by_table_replica_count = tbi.servers_by_replica_count;
    if (servers_by_table_replica_count.empty()) {
      return Status::InvalidArgument(Substitute(
          "no information on replicas of table $0", tbi.table_id));
    }

    const auto min_replica_count = servers_by_table_replica_count.begin()->first;
    const auto max_replica_count = servers_by_table_replica_count.rbegin()->first;
    VLOG(1) << Substitute(
        "balancing table $0 with replica count skew $1 "
        "(min_replica_count: $2, max_replica_count: $3)",
        tbi.table_id, table_info_by_skew.rbegin()->first,
        min_replica_count, max_replica_count);

    // Compute the intersection of the tablet servers most loaded for the table
    // with the tablet servers most loaded overall, and likewise for least loaded.
    // These are our ideal candidates for moving from and to, respectively.
    int32_t max_count_table;
    int32_t max_count_total;
    vector<string> max_loaded;
    vector<string> max_loaded_intersection;
    RETURN_NOT_OK(GetIntersection(
        ExtremumType::MAX,
        servers_by_table_replica_count, servers_by_total_replica_count,
        &max_count_table, &max_count_total,
        &max_loaded, &max_loaded_intersection));
    int32_t min_count_table;
    int32_t min_count_total;
    vector<string> min_loaded;
    vector<string> min_loaded_intersection;
    RETURN_NOT_OK(GetIntersection(
        ExtremumType::MIN,
        servers_by_table_replica_count, servers_by_total_replica_count,
        &min_count_table, &min_count_total,
        &min_loaded, &min_loaded_intersection));

    VLOG(1) << Substitute("table-wise  : min_count: $0, max_count: $1",
                          min_count_table, max_count_table);
    VLOG(1) << Substitute("cluster-wise: min_count: $0, max_count: $1",
                          min_count_total, max_count_total);
    if (PREDICT_FALSE(VLOG_IS_ON(1))) {
      ostringstream s;
      s << "[ ";
      for (const auto& e : max_loaded_intersection) {
        s << e << " ";
      }
      s << "]";
      VLOG(1) << "max_loaded_intersection: " << s.str();

      s.str("");
      s << "[ ";
      for (const auto& e : min_loaded_intersection) {
        s << e << " ";
      }
      s << "]";
      VLOG(1) << "min_loaded_intersection: " << s.str();
    }
    // Do not move replicas of a balanced table if the least (most) loaded
    // servers overall do not intersect the servers hosting the least (most)
    // replicas of the table. Moving a replica in that case might keep the
    // cluster skew the same or make it worse while keeping the table balanced.
    if ((max_count_table <= min_count_table + 1) &&
        (min_loaded_intersection.empty() || max_loaded_intersection.empty())) {
      continue;
    }
    if (equal_skew_opt_ == EqualSkewOption::PICK_RANDOM) {
      shuffle(min_loaded.begin(), min_loaded.end(), generator_);
      shuffle(min_loaded_intersection.begin(), min_loaded_intersection.end(),
              generator_);
      shuffle(max_loaded.begin(), max_loaded.end(), generator_);
      shuffle(max_loaded_intersection.begin(), max_loaded_intersection.end(),
              generator_);
    }
    const auto& min_loaded_uuid = min_loaded_intersection.empty()
        ? min_loaded.front() : min_loaded_intersection.front();
    const auto& max_loaded_uuid = max_loaded_intersection.empty()
        ? max_loaded.back() : max_loaded_intersection.back();
    VLOG(1) << Substitute("min_loaded_uuid: $0, max_loaded_uuid: $1",
                          min_loaded_uuid, max_loaded_uuid);
    if (min_loaded_uuid == max_loaded_uuid) {
      // Nothing to move.
      continue;
    }

    // Move a replica of the selected table from a most loaded server to a
    // least loaded server.
    *move = { tbi.table_id, max_loaded_uuid, min_loaded_uuid };
    break;
  }

  return Status::OK();
}