Error RALocalAllocator::allocInst()

in erts/emulator/asmjit/core/ralocal.cpp [396:839]


Error RALocalAllocator::allocInst(InstNode* node) noexcept {
  RAInst* raInst = node->passData<RAInst>();

  RATiedReg* outTiedRegs[Globals::kMaxPhysRegs];
  RATiedReg* dupTiedRegs[Globals::kMaxPhysRegs];

  // The cursor must point to the previous instruction for a possible instruction insertion.
  _cc->_setCursor(node->prev());

  _node = node;
  _raInst = raInst;
  _tiedTotal = raInst->_tiedTotal;
  _tiedCount = raInst->_tiedCount;

  // Whether we already replaced register operand with memory operand.
  bool rmAllocated = false;

  for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) {
    uint32_t i, count = this->tiedCount(group);
    RATiedReg* tiedRegs = this->tiedRegs(group);

    uint32_t willUse = _raInst->_usedRegs[group];
    uint32_t willOut = _raInst->_clobberedRegs[group];
    uint32_t willFree = 0;
    uint32_t usePending = count;

    uint32_t outTiedCount = 0;
    uint32_t dupTiedCount = 0;

    // ------------------------------------------------------------------------
    // STEP 1:
    //
    // Calculate `willUse` and `willFree` masks based on tied registers we have.
    //
    // We don't do any assignment decisions at this stage as we just need to
    // collect some information first. Then, after we populate all masks needed
    // we can finally make some decisions in the second loop. The main reason
    // for this is that we really need `willFree` to make assignment decisions
    // for `willUse`, because if we mark some registers that will be freed, we
    // can consider them in decision making afterwards.
    // ------------------------------------------------------------------------

    for (i = 0; i < count; i++) {
      RATiedReg* tiedReg = &tiedRegs[i];

      // Add OUT and KILL to `outPending` for CLOBBERing and/or OUT assignment.
      if (tiedReg->isOutOrKill())
        outTiedRegs[outTiedCount++] = tiedReg;

      if (tiedReg->isDuplicate())
        dupTiedRegs[dupTiedCount++] = tiedReg;

      if (!tiedReg->isUse()) {
        tiedReg->markUseDone();
        usePending--;
        continue;
      }

      uint32_t workId = tiedReg->workId();
      uint32_t assignedId = _curAssignment.workToPhysId(group, workId);

      if (tiedReg->hasUseId()) {
        // If the register has `useId` it means it can only be allocated in that register.
        uint32_t useMask = Support::bitMask(tiedReg->useId());

        // RAInstBuilder must have collected `usedRegs` on-the-fly.
        ASMJIT_ASSERT((willUse & useMask) != 0);

        if (assignedId == tiedReg->useId()) {
          // If the register is already allocated in this one, mark it done and continue.
          tiedReg->markUseDone();
          if (tiedReg->isWrite())
            _curAssignment.makeDirty(group, workId, assignedId);
          usePending--;
          willUse |= useMask;
        }
        else {
          willFree |= useMask & _curAssignment.assigned(group);
        }
      }
      else {
        // Check if the register must be moved to `allocableRegs`.
        uint32_t allocableRegs = tiedReg->allocableRegs();
        if (assignedId != RAAssignment::kPhysNone) {
          uint32_t assignedMask = Support::bitMask(assignedId);
          if ((allocableRegs & ~willUse) & assignedMask) {
            tiedReg->setUseId(assignedId);
            tiedReg->markUseDone();
            if (tiedReg->isWrite())
              _curAssignment.makeDirty(group, workId, assignedId);
            usePending--;
            willUse |= assignedMask;
          }
          else {
            willFree |= assignedMask;
          }
        }
      }
    }

    // ------------------------------------------------------------------------
    // STEP 2:
    //
    // Do some decision making to find the best candidates of registers that
    // need to be assigned, moved, and/or spilled. Only USE registers are
    // considered here, OUT will be decided later after all CLOBBERed and OUT
    // registers are unassigned.
    // ------------------------------------------------------------------------

    if (usePending) {
      // TODO: Not sure `liveRegs` should be used, maybe willUse and willFree would be enough and much more clear.

      // All registers that are currently alive without registers that will be freed.
      uint32_t liveRegs = _curAssignment.assigned(group) & ~willFree;

      for (i = 0; i < count; i++) {
        RATiedReg* tiedReg = &tiedRegs[i];
        if (tiedReg->isUseDone()) continue;

        uint32_t workId = tiedReg->workId();
        uint32_t assignedId = _curAssignment.workToPhysId(group, workId);

        // REG/MEM: Patch register operand to memory operand if not allocated.
        if (!rmAllocated && tiedReg->hasUseRM()) {
          if (assignedId == RAAssignment::kPhysNone && Support::isPowerOf2(tiedReg->useRewriteMask())) {
            RAWorkReg* workReg = workRegById(tiedReg->workId());
            uint32_t opIndex = Support::ctz(tiedReg->useRewriteMask()) / uint32_t(sizeof(Operand) / sizeof(uint32_t));
            uint32_t rmSize = tiedReg->rmSize();

            if (rmSize <= workReg->virtReg()->virtSize()) {
              Operand& op = node->operands()[opIndex];
              op = _pass->workRegAsMem(workReg);
              op.as<BaseMem>().setSize(rmSize);
              tiedReg->_useRewriteMask = 0;

              tiedReg->markUseDone();
              usePending--;

              rmAllocated = true;
              continue;
            }
          }
        }

        if (!tiedReg->hasUseId()) {
          uint32_t allocableRegs = tiedReg->allocableRegs() & ~(willFree | willUse);

          // DECIDE where to assign the USE register.
          uint32_t useId = decideOnAssignment(group, workId, assignedId, allocableRegs);
          uint32_t useMask = Support::bitMask(useId);

          willUse |= useMask;
          willFree |= useMask & liveRegs;
          tiedReg->setUseId(useId);

          if (assignedId != RAAssignment::kPhysNone) {
            uint32_t assignedMask = Support::bitMask(assignedId);

            willFree |= assignedMask;
            liveRegs &= ~assignedMask;

            // OPTIMIZATION: Assign the USE register here if it's possible.
            if (!(liveRegs & useMask)) {
              ASMJIT_PROPAGATE(onMoveReg(group, workId, useId, assignedId));
              tiedReg->markUseDone();
              if (tiedReg->isWrite())
                _curAssignment.makeDirty(group, workId, useId);
              usePending--;
            }
          }
          else {
            // OPTIMIZATION: Assign the USE register here if it's possible.
            if (!(liveRegs & useMask)) {
              ASMJIT_PROPAGATE(onLoadReg(group, workId, useId));
              tiedReg->markUseDone();
              if (tiedReg->isWrite())
                _curAssignment.makeDirty(group, workId, useId);
              usePending--;
            }
          }

          liveRegs |= useMask;
        }
      }
    }

    // Initially all used regs will be marked clobbered.
    uint32_t clobberedByInst = willUse | willOut;

    // ------------------------------------------------------------------------
    // STEP 3:
    //
    // Free all registers that we marked as `willFree`. Only registers that are not
    // USEd by the instruction are considered as we don't want to free regs we need.
    // ------------------------------------------------------------------------

    if (willFree) {
      uint32_t allocableRegs = _availableRegs[group] & ~(_curAssignment.assigned(group) | willFree | willUse | willOut);
      Support::BitWordIterator<uint32_t> it(willFree);

      do {
        uint32_t assignedId = it.next();
        if (_curAssignment.isPhysAssigned(group, assignedId)) {
          uint32_t workId = _curAssignment.physToWorkId(group, assignedId);

          // DECIDE whether to MOVE or SPILL.
          if (allocableRegs) {
            uint32_t reassignedId = decideOnReassignment(group, workId, assignedId, allocableRegs);
            if (reassignedId != RAAssignment::kPhysNone) {
              ASMJIT_PROPAGATE(onMoveReg(group, workId, reassignedId, assignedId));
              allocableRegs ^= Support::bitMask(reassignedId);
              continue;
            }
          }

          ASMJIT_PROPAGATE(onSpillReg(group, workId, assignedId));
        }
      } while (it.hasNext());
    }

    // ------------------------------------------------------------------------
    // STEP 4:
    //
    // ALLOCATE / SHUFFLE all registers that we marked as `willUse` and weren't
    // allocated yet. This is a bit complicated as the allocation is iterative.
    // In some cases we have to wait before allocating a particual physical
    // register as it's still occupied by some other one, which we need to move
    // before we can use it. In this case we skip it and allocate another some
    // other instead (making it free for another iteration).
    //
    // NOTE: Iterations are mostly important for complicated allocations like
    // function calls, where there can be up to N registers used at once. Asm
    // instructions won't run the loop more than once in 99.9% of cases as they
    // use 2..3 registers in average.
    // ------------------------------------------------------------------------

    if (usePending) {
      bool mustSwap = false;
      do {
        uint32_t oldPending = usePending;

        for (i = 0; i < count; i++) {
          RATiedReg* thisTiedReg = &tiedRegs[i];
          if (thisTiedReg->isUseDone()) continue;

          uint32_t thisWorkId = thisTiedReg->workId();
          uint32_t thisPhysId = _curAssignment.workToPhysId(group, thisWorkId);

          // This would be a bug, fatal one!
          uint32_t targetPhysId = thisTiedReg->useId();
          ASMJIT_ASSERT(targetPhysId != thisPhysId);

          uint32_t targetWorkId = _curAssignment.physToWorkId(group, targetPhysId);
          if (targetWorkId != RAAssignment::kWorkNone) {
            RAWorkReg* targetWorkReg = workRegById(targetWorkId);

            // Swapping two registers can solve two allocation tasks by emitting
            // just a single instruction. However, swap is only available on few
            // architectures and it's definitely not available for each register
            // group. Calling `onSwapReg()` before checking these would be fatal.
            if (_archTraits.hasSwap(group) && thisPhysId != RAAssignment::kPhysNone) {
              ASMJIT_PROPAGATE(onSwapReg(group, thisWorkId, thisPhysId, targetWorkId, targetPhysId));

              thisTiedReg->markUseDone();
              if (thisTiedReg->isWrite())
                _curAssignment.makeDirty(group, thisWorkId, targetPhysId);
              usePending--;

              // Double-hit.
              RATiedReg* targetTiedReg = RALocal_findTiedRegByWorkId(tiedRegs, count, targetWorkReg->workId());
              if (targetTiedReg && targetTiedReg->useId() == thisPhysId) {
                targetTiedReg->markUseDone();
                if (targetTiedReg->isWrite())
                  _curAssignment.makeDirty(group, targetWorkId, thisPhysId);
                usePending--;
              }
              continue;
            }

            if (!mustSwap)
              continue;

            // Only branched here if the previous iteration did nothing. This is
            // essentially a SWAP operation without having a dedicated instruction
            // for that purpose (vector registers, etc). The simplest way to
            // handle such case is to SPILL the target register.
            ASMJIT_PROPAGATE(onSpillReg(group, targetWorkId, targetPhysId));
          }

          if (thisPhysId != RAAssignment::kPhysNone) {
            ASMJIT_PROPAGATE(onMoveReg(group, thisWorkId, targetPhysId, thisPhysId));

            thisTiedReg->markUseDone();
            if (thisTiedReg->isWrite())
              _curAssignment.makeDirty(group, thisWorkId, targetPhysId);
            usePending--;
          }
          else {
            ASMJIT_PROPAGATE(onLoadReg(group, thisWorkId, targetPhysId));

            thisTiedReg->markUseDone();
            if (thisTiedReg->isWrite())
              _curAssignment.makeDirty(group, thisWorkId, targetPhysId);
            usePending--;
          }
        }

        mustSwap = (oldPending == usePending);
      } while (usePending);
    }

    // ------------------------------------------------------------------------
    // STEP 5:
    //
    // KILL registers marked as KILL/OUT.
    // ------------------------------------------------------------------------

    uint32_t outPending = outTiedCount;
    if (outTiedCount) {
      for (i = 0; i < outTiedCount; i++) {
        RATiedReg* tiedReg = outTiedRegs[i];

        uint32_t workId = tiedReg->workId();
        uint32_t physId = _curAssignment.workToPhysId(group, workId);

        // Must check if it's allocated as KILL can be related to OUT (like KILL
        // immediately after OUT, which could mean the register is not assigned).
        if (physId != RAAssignment::kPhysNone) {
          ASMJIT_PROPAGATE(onKillReg(group, workId, physId));
          willOut &= ~Support::bitMask(physId);
        }

        // We still maintain number of pending registers for OUT assignment.
        // So, if this is only KILL, not OUT, we can safely decrement it.
        outPending -= !tiedReg->isOut();
      }
    }

    // ------------------------------------------------------------------------
    // STEP 6:
    //
    // SPILL registers that will be CLOBBERed. Since OUT and KILL were
    // already processed this is used mostly to handle function CALLs.
    // ------------------------------------------------------------------------

    if (willOut) {
      Support::BitWordIterator<uint32_t> it(willOut);
      do {
        uint32_t physId = it.next();
        uint32_t workId = _curAssignment.physToWorkId(group, physId);

        if (workId == RAAssignment::kWorkNone)
          continue;

        ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
      } while (it.hasNext());
    }

    // ------------------------------------------------------------------------
    // STEP 7:
    //
    // Duplication.
    // ------------------------------------------------------------------------

    for (i = 0; i < dupTiedCount; i++) {
      RATiedReg* tiedReg = dupTiedRegs[i];
      uint32_t workId = tiedReg->workId();
      uint32_t srcId = tiedReg->useId();

      Support::BitWordIterator<uint32_t> it(tiedReg->_allocableRegs);
      while (it.hasNext()) {
        uint32_t dstId = it.next();
        if (dstId == srcId)
          continue;
        _pass->onEmitMove(workId, dstId, srcId);
      }
    }

    // ------------------------------------------------------------------------
    // STEP 8:
    //
    // Assign OUT registers.
    // ------------------------------------------------------------------------

    if (outPending) {
      // Live registers, we need a separate variable (outside of `_curAssignment)
      // to hold these because of KILLed registers. If we KILL a register here it
      // will go out from `_curAssignment`, but we cannot assign to it in here.
      uint32_t liveRegs = _curAssignment.assigned(group);

      // Must avoid as they have been already OUTed (added during the loop).
      uint32_t outRegs = 0;

      // Must avoid as they collide with already allocated ones.
      uint32_t avoidRegs = willUse & ~clobberedByInst;

      for (i = 0; i < outTiedCount; i++) {
        RATiedReg* tiedReg = outTiedRegs[i];
        if (!tiedReg->isOut()) continue;

        uint32_t workId = tiedReg->workId();
        uint32_t assignedId = _curAssignment.workToPhysId(group, workId);

        if (assignedId != RAAssignment::kPhysNone)
          ASMJIT_PROPAGATE(onKillReg(group, workId, assignedId));

        uint32_t physId = tiedReg->outId();
        if (physId == RAAssignment::kPhysNone) {
          uint32_t allocableRegs = _availableRegs[group] & ~(outRegs | avoidRegs);

          if (!(allocableRegs & ~liveRegs)) {
            // There are no more registers, decide which one to spill.
            uint32_t spillWorkId;
            physId = decideOnSpillFor(group, workId, allocableRegs & liveRegs, &spillWorkId);
            ASMJIT_PROPAGATE(onSpillReg(group, spillWorkId, physId));
          }
          else {
            physId = decideOnAssignment(group, workId, RAAssignment::kPhysNone, allocableRegs & ~liveRegs);
          }
        }

        // OUTs are CLOBBERed thus cannot be ASSIGNed right now.
        ASMJIT_ASSERT(!_curAssignment.isPhysAssigned(group, physId));

        if (!tiedReg->isKill())
          ASMJIT_PROPAGATE(onAssignReg(group, workId, physId, true));

        tiedReg->setOutId(physId);
        tiedReg->markOutDone();

        outRegs |= Support::bitMask(physId);
        liveRegs &= ~Support::bitMask(physId);
        outPending--;
      }

      clobberedByInst |= outRegs;
      ASMJIT_ASSERT(outPending == 0);
    }

    _clobberedRegs[group] |= clobberedByInst;
  }

  return kErrorOk;
}