void LinearScanAllocator::calculateLiveIntervals()

in Jit/lir/regalloc.cpp [226:468]


void LinearScanAllocator::calculateLiveIntervals() {
  const auto& basic_blocks = func_->basicblocks();

  // This table maps loop headers to all their loop ends. A loop end basic
  // block is the last block of a loop starting at the loop header.
  // The key is the pointer to the loop header and the value std::vector<int>
  // is a vector of the block ids of all the associated loop ends.
  UnorderedMap<const BasicBlock*, std::vector<int>> loop_ends;

#ifdef Py_DEBUG
  UnorderedSet<const Operand*> seen_outputs;
#endif

  int total_instrs = 0;
  for (auto& bb : basic_blocks) {
    total_instrs += bb->getNumInstrs();
  }
  int total_ids = total_instrs * 2 + basic_blocks.size();

  UnorderedSet<const BasicBlock*> visited_blocks;
  for (auto iter = basic_blocks.rbegin(); iter != basic_blocks.rend(); ++iter) {
    BasicBlock* bb = *iter;

    // bb_start_id and bb_end_id do not point to any instructions.
    // each instrution is associated to two ids, where the first id
    // if for using its inputs, and the second id is for defining
    // its output.

    // Basic block M
    // x   <- bb_start_id
    // x + 1  instructions1
    // x + 2
    // x + 3  instructions2
    // x + 4
    // ...
    // x + 2N - 1  instructionsN
    // x + 2N
    // basic block M + 1
    // x + 2N + 1   <- bb_end_id of bb M, bb_start_id of block M + 1
    constexpr int kIdsPerInstr = 2;
    auto bb_end_id = total_ids;
    auto bb_instrs = bb->getNumInstrs() * kIdsPerInstr;
    total_ids -= bb_instrs;
    total_ids--;
    auto bb_start_id = total_ids;

    auto lir_bb_iter =
        regalloc_blocks_
            .emplace(
                std::piecewise_construct,
                std::forward_as_tuple(bb),
                std::forward_as_tuple(bb, bb_start_id, bb->getFirstInstr()))
            .first;

    auto& successors = bb->successors();

    UnorderedSet<const Operand*> live;

    for (auto succ : successors) {
      // each successor's livein is live
      auto live_iter = regalloc_blocks_.find(succ);
      if (live_iter != regalloc_blocks_.end()) {
        auto& livein = live_iter->second.livein;
        live.insert(livein.begin(), livein.end());
      }

      // each successor's phi inputs are live
      succ->foreachPhiInstr([&bb, &live](const Instruction* instr) {
        auto opnd = instr->getOperandByPredecessor(bb)->getDefine();
        live.insert(opnd);
      });
    }

    for (auto live_opnd : live) {
      getIntervalByVReg(live_opnd).addRange({bb_start_id, bb_end_id});
    }

    int instr_id = bb_end_id - kIdsPerInstr;
    auto& instrs = bb->instructions();
    for (auto instr_iter = instrs.rbegin(); instr_iter != instrs.rend();
         ++instr_iter, instr_id -= kIdsPerInstr) {
      auto instr = instr_iter->get();
      auto instr_opcode = instr->opcode();
      if (instr_opcode == Instruction::kPhi) {
        // ignore phi instructions
        continue;
      }

      // output
      auto output_opnd = instr->output();
      if (output_opnd->isVreg()) {
#ifdef Py_DEBUG
        auto inserted = seen_outputs.insert(output_opnd).second;
        JIT_DCHECK(inserted, "LIR is not in SSA form");
#endif
        getIntervalByVReg(output_opnd).setFrom(instr_id + 1);
        live.erase(output_opnd);

        if (instr->getOutputPhyRegUse()) {
          vreg_phy_uses_[output_opnd].emplace(instr_id + 1);
        }
      }

      auto register_input = [&](const OperandBase* operand, bool reguse) {
        auto def = operand->getDefine();

        auto pair = vreg_interval_.emplace(def, def);

        int range_end = operand->instr()->inputsLiveAcross()
            ? instr_id + kIdsPerInstr
            : instr_id + 1;
        pair.first->second.addRange({bb_start_id, range_end});

        // if the def is not live before, record the last use
        if (!live.count(def) && operand->isLinked()) {
          vreg_last_use_[def].emplace(
              static_cast<const LinkedOperand*>(operand), instr_id);
        }

        live.insert(def);
        if (reguse) {
          vreg_phy_uses_[def].emplace(instr_id);
        }
      };

      auto visit_indirect = [&](const OperandBase* operand) {
        auto indirect = operand->getMemoryIndirect();
        auto base = indirect->getBaseRegOperand();
        if (base->isVreg()) {
          register_input(base, true);
        }

        auto index = indirect->getIndexRegOperand();
        if (index != nullptr && index->isVreg()) {
          register_input(index, true);
        }
      };

      // if output is a memory indirect, the base and index registers
      // should be considered as inputs.
      if (output_opnd->isInd()) {
        visit_indirect(output_opnd);
      }

      // inputs
      for (size_t i = 0; i < instr->getNumInputs(); i++) {
        const OperandBase* opnd = instr->getInput(i);
        if (!opnd->isVreg() && !opnd->isInd()) {
          continue;
        }

        if (opnd->isInd()) {
          visit_indirect(opnd);
          continue;
        }

        register_input(opnd, instr->getInputPhyRegUse(i));
      }

      if (instr_opcode == Instruction::kCall ||
          instr_opcode == Instruction::kVectorCall) {
        reserveCallerSaveRegisters(instr_id);
      }

      if ((instr_opcode == Instruction::kMul) &&
          (instr->getInput(0)->dataType() == OperandBase::k8bit)) {
        // see rewriteByteMultiply
        reserveRegisters(instr_id, PhyRegisterSet(PhyLocation::RAX));
      } else if (
          instr_opcode == Instruction::kDiv ||
          instr_opcode == Instruction::kDivUn) {
        PhyRegisterSet reserved(PhyLocation::RAX);

        if (instr->getInput(1)->dataType() != OperandBase::k8bit) {
          reserved = reserved | PhyLocation::RDX;
        }

        reserveRegisters(instr_id, reserved);
      }

      if (instr->isAnyYield()) {
        spillRegistersForYield(instr_id);
      }

      if (instr_opcode == Instruction::kBind) {
        auto& interval = getIntervalByVReg(instr->output());
        interval.allocateTo(instr->getInput(0)->getPhyRegister());
      }
    }
    // From the original paper:
    /*
       Phi functions are not processed during this iteration of operations,
       instead they are iterated separately. Because the live range of a phi
       function starts at the beginning of the block, it is not necessary to
       shorten the range for its output operand. The operand is only removed
       from the set of live registers. The input operands of the phi function
       are not handled here, because this is done independently when the
       different predecessors are processed. Thus, neither an input operand nor
       the output operand of a phi function is live at the beginning of the phi
       function's block.
    */
    bb->foreachPhiInstr(
        [&live](const Instruction* phi) { live.erase(phi->output()); });

    auto loop_iter = loop_ends.find(bb);
    if (loop_iter != loop_ends.end()) {
      for (auto& loop_end_id : loop_iter->second) {
        for (auto& opnd : live) {
          LiveRange loop_range(bb_start_id, loop_end_id);
          getIntervalByVReg(opnd).addRange(loop_range);
          // if the last use is in a loop, it is not a real last use
          auto opnd_iter = vreg_last_use_.find(opnd);
          if (opnd_iter == vreg_last_use_.end()) {
            continue;
          }
          auto& uses = opnd_iter->second;
          for (auto use_iter = uses.begin(); use_iter != uses.end();) {
            LIRLocation use_loc = use_iter->second;
            if (loop_range.isInRange(use_loc)) {
              use_iter = uses.erase(use_iter);
              continue;
            }

            ++use_iter;
          }
        }
      }
    }

    lir_bb_iter->second.livein = std::move(live);

    // record a loop end
    for (auto& succ : bb->successors()) {
      if (visited_blocks.count(bb)) {
        continue;
      }

      loop_ends[succ].push_back(bb_start_id + bb_instrs + 1);
    }

    visited_blocks.insert(bb);
  }
}