protected List balanceTable()

in hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java [565:747]


  protected List<RegionPlan> balanceTable(TableName tableName,
    Map<ServerName, List<RegionInfo>> loadOfOneTable) {
    // On clusters with lots of HFileLinks or lots of reference files,
    // instantiating the storefile infos can be quite expensive.
    // Allow turning this feature off if the locality cost is not going to
    // be used in any computations.
    RegionHDFSBlockLocationFinder finder = null;
    if ((this.localityCost != null) || (this.rackLocalityCost != null)) {
      finder = this.regionFinder;
    }

    // The clusterState that is given to this method contains the state
    // of all the regions in the table(s) (that's true today)
    // Keep track of servers to iterate through them.
    BalancerClusterState cluster = new BalancerClusterState(loadOfOneTable, loads, finder,
      rackManager, regionCacheRatioOnOldServerMap);

    long startTime = EnvironmentEdgeManager.currentTime();
    cluster.setStopRequestedAt(startTime + maxRunningTime);

    initCosts(cluster);
    balancerConditionals.loadClusterState(cluster);
    balancerConditionals.clearConditionalWeightCaches();

    float localSumMultiplier = 0;
    for (CostFunction c : costFunctions) {
      if (c.isNeeded()) {
        localSumMultiplier += c.getMultiplier();
      }
    }
    sumMultiplier = localSumMultiplier;
    if (sumMultiplier <= 0) {
      LOG.error("At least one cost function needs a multiplier > 0. For example, set "
        + "hbase.master.balancer.stochastic.regionCountCost to a positive value or default");
      return null;
    }

    double currentCost = computeCost(cluster, Double.MAX_VALUE);
    curOverallCost = currentCost;
    System.arraycopy(tempFunctionCosts, 0, curFunctionCosts, 0, curFunctionCosts.length);
    updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
    double initCost = currentCost;
    double newCost;

    if (!needsBalance(tableName, cluster)) {
      return null;
    }

    long computedMaxSteps;
    if (runMaxSteps) {
      computedMaxSteps = Math.max(this.maxSteps, calculateMaxSteps(cluster));
    } else {
      long calculatedMaxSteps = calculateMaxSteps(cluster);
      computedMaxSteps = Math.min(this.maxSteps, calculatedMaxSteps);
      if (calculatedMaxSteps > maxSteps) {
        LOG.warn(
          "calculatedMaxSteps:{} for loadbalancer's stochastic walk is larger than "
            + "maxSteps:{}. Hence load balancing may not work well. Setting parameter "
            + "\"hbase.master.balancer.stochastic.runMaxSteps\" to true can overcome this issue."
            + "(This config change does not require service restart)",
          calculatedMaxSteps, maxSteps);
      }
    }
    LOG.info(
      "Start StochasticLoadBalancer.balancer, initial weighted average imbalance={}, "
        + "functionCost={} computedMaxSteps={}",
      currentCost / sumMultiplier, functionCost(), computedMaxSteps);

    final String initFunctionTotalCosts = totalCostsPerFunc();
    // Perform a stochastic walk to see if we can get a good fit.
    long step;
    boolean planImprovedConditionals = false;
    Map<Class<? extends CandidateGenerator>, Long> generatorToStepCount = new HashMap<>();
    Map<Class<? extends CandidateGenerator>, Long> generatorToApprovedActionCount = new HashMap<>();
    for (step = 0; step < computedMaxSteps; step++) {
      Pair<CandidateGenerator, BalanceAction> nextAction = nextAction(cluster);
      CandidateGenerator generator = nextAction.getFirst();
      BalanceAction action = nextAction.getSecond();

      if (action.getType() == BalanceAction.Type.NULL) {
        continue;
      }

      int conditionalViolationsChange = 0;
      boolean isViolatingConditionals = false;
      boolean moveImprovedConditionals = false;
      // Only check conditionals if they are enabled
      if (balancerConditionals.isConditionalBalancingEnabled()) {
        // Always accept a conditional generator output. Sometimes conditional generators
        // may need to make controversial moves in order to break what would otherwise
        // be a deadlocked situation.
        // Otherwise, for normal moves, evaluate the action.
        if (RegionPlanConditionalCandidateGenerator.class.isAssignableFrom(generator.getClass())) {
          conditionalViolationsChange = -1;
        } else {
          conditionalViolationsChange =
            balancerConditionals.getViolationCountChange(cluster, action);
          isViolatingConditionals = balancerConditionals.isViolating(cluster, action);
        }
        moveImprovedConditionals = conditionalViolationsChange < 0;
        if (moveImprovedConditionals) {
          planImprovedConditionals = true;
        }
      }

      // Change state and evaluate costs
      try {
        cluster.doAction(action);
      } catch (IllegalStateException | ArrayIndexOutOfBoundsException e) {
        LOG.warn(
          "Generator {} produced invalid action! "
            + "Debug your candidate generator as this is likely a bug, "
            + "and may cause a balancer deadlock. {}",
          generator.getClass().getSimpleName(), action, e);
        continue;
      }
      updateCostsAndWeightsWithAction(cluster, action);
      generatorToStepCount.merge(generator.getClass(), action.getStepCount(), Long::sum);

      newCost = computeCost(cluster, currentCost);

      double costImprovement = currentCost - newCost;
      double minimumImprovement =
        Math.max(CostFunction.getCostEpsilon(currentCost), CostFunction.getCostEpsilon(newCost));
      boolean costsImproved = costImprovement > minimumImprovement;
      boolean conditionalsSimilarCostsImproved =
        (costsImproved && conditionalViolationsChange == 0 && !isViolatingConditionals);
      // Our first priority is to reduce conditional violations
      // Our second priority is to reduce balancer cost
      // change, regardless of cost change
      if (moveImprovedConditionals || conditionalsSimilarCostsImproved) {
        currentCost = newCost;
        generatorToApprovedActionCount.merge(generator.getClass(), action.getStepCount(),
          Long::sum);

        // save for JMX
        curOverallCost = currentCost;
        System.arraycopy(tempFunctionCosts, 0, curFunctionCosts, 0, curFunctionCosts.length);
      } else {
        // Put things back the way they were before.
        // TODO: undo by remembering old values
        BalanceAction undoAction = action.undoAction();
        cluster.doAction(undoAction);
        updateCostsAndWeightsWithAction(cluster, undoAction);
      }

      if (cluster.isStopRequested()) {
        break;
      }
    }
    long endTime = EnvironmentEdgeManager.currentTime();

    StringJoiner joiner = new StringJoiner("\n");
    joiner.add("CandidateGenerator activity summary:");
    generatorToStepCount.forEach((generator, count) -> {
      long approvals = generatorToApprovedActionCount.getOrDefault(generator, 0L);
      joiner.add(String.format(" - %s: %d steps, %d approvals", generator.getSimpleName(), count,
        approvals));
    });
    LOG.debug(joiner.toString());

    metricsBalancer.balanceCluster(endTime - startTime);

    if (planImprovedConditionals || (initCost > currentCost)) {
      updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
      List<RegionPlan> plans = createRegionPlans(cluster);
      LOG.info(
        "Finished computing new moving plan. Computation took {} ms"
          + " to try {} different iterations.  Found a solution that moves "
          + "{} regions; Going from a computed imbalance of {}"
          + " to a new imbalance of {}. funtionCost={}",
        endTime - startTime, step, plans.size(), initCost / sumMultiplier,
        currentCost / sumMultiplier, functionCost());
      sendRegionPlansToRingBuffer(plans, currentCost, initCost, initFunctionTotalCosts, step);
      return plans;
    }
    LOG.info(
      "Could not find a better moving plan.  Tried {} different configurations in "
        + "{} ms, and did not find anything with an imbalance score less than {} "
        + "and could not improve conditional violations",
      step, endTime - startTime, initCost / sumMultiplier);
    return null;
  }