hphp/runtime/vm/jit/vasm-xls.cpp (1,995 lines of code) (raw):
/*
+----------------------------------------------------------------------+
| HipHop for PHP |
+----------------------------------------------------------------------+
| Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
*/
#include "hphp/runtime/vm/jit/vasm.h"
#include "hphp/runtime/base/stats.h"
#include "hphp/runtime/vm/jit/abi.h"
#include "hphp/runtime/vm/jit/print.h"
#include "hphp/runtime/vm/jit/punt.h"
#include "hphp/runtime/vm/jit/reg-algorithms.h"
#include "hphp/runtime/vm/jit/timer.h"
#include "hphp/runtime/vm/jit/vasm-instr.h"
#include "hphp/runtime/vm/jit/vasm-print.h"
#include "hphp/runtime/vm/jit/vasm-reg.h"
#include "hphp/runtime/vm/jit/vasm-unit.h"
#include "hphp/runtime/vm/jit/vasm-util.h"
#include "hphp/runtime/vm/jit/vasm-visit.h"
#include "hphp/util/arch.h"
#include "hphp/util/assertions.h"
#include "hphp/util/dataflow-worklist.h"
#include "hphp/util/trace.h"
#include <boost/dynamic_bitset.hpp>
#include <boost/range/adaptor/reversed.hpp>
#include <folly/Format.h>
#include <algorithm>
#include <random>
// future work
// - #3098509 streamline code, vectors vs linked lists, etc
// - #3098685 Optimize lifetime splitting
// - #3098739 new features now possible with XLS
TRACE_SET_MOD(xls);
namespace HPHP::jit {
///////////////////////////////////////////////////////////////////////////////
namespace {
///////////////////////////////////////////////////////////////////////////////
std::atomic<size_t> s_counter;
DEBUG_ONLY constexpr auto kHintLevel = 5;
/*
* Vreg discriminator.
*/
enum class Constraint { Any, CopySrc, Gpr, Simd, Sf };
Constraint constraint(const Vreg&) { return Constraint::Any; }
Constraint constraint(const Vreg64&) { return Constraint::Gpr; }
Constraint constraint(const Vreg32&) { return Constraint::Gpr; }
Constraint constraint(const Vreg16&) { return Constraint::Gpr; }
Constraint constraint(const Vreg8&) { return Constraint::Gpr; }
Constraint constraint(const VregDbl&) { return Constraint::Simd; }
Constraint constraint(const Vreg128&) { return Constraint::Simd; }
Constraint constraint(const VregSF&) { return Constraint::Sf; }
bool is_wide(const Vreg128&) { return true; }
template<class T> bool is_wide(const T&) { return false; }
/*
* Sentinel constants.
*/
constexpr int kInvalidPhiGroup = -1;
constexpr int kInvalidSpillSlot = -1;
const unsigned kMaxPos = UINT_MAX; // "infinity" use position
/*
* A Use refers to the position where an interval is used or defined.
*/
struct Use {
Use() {}
Use(Constraint kind, unsigned pos, Vreg hint)
: kind(kind)
, pos(pos)
, hint(hint)
{}
/*
* Constraint imposed on the Vreg at this use.
*/
Constraint kind{Constraint::Any};
/*
* Position of this use or def.
*/
unsigned pos{kMaxPos};
/*
* If valid, try to assign the same physical register here as `hint' was
* assigned at `pos'.
*/
Vreg hint;
/*
* Index of phi group metadata.
*/
int phi_group{kInvalidPhiGroup};
};
/*
* A LiveRange is an closed-open range of positions where an interval is live.
*
* Specifically, for the LiveRange [start, end), start is in the range and
* end is not.
*/
struct LiveRange {
bool contains(unsigned pos) const { return pos >= start && pos < end; }
bool intersects(LiveRange r) const { return r.start < end && start < r.end; }
bool contains(LiveRange r) const { return r.start >= start && r.end <= end; }
public:
unsigned start, end;
};
struct Variable;
/*
* An Interval represents the lifetime of a Vreg as a sorted list of disjoint
* ranges and a sorted list of use positions. It is the unit of register
* allocation.
*
* Intervals may be split---e.g., because the Vreg needed to be spilled in some
* subrange. All (sub-)Intervals for a given Vreg are connected as a singly
* linked list, sorted by start position.
*
* Every use position must be inside one of the ranges, or exactly at the end
* of the last range. Allowing a use exactly at the end facilitates lifetime
* splitting when the use at the position of an instruction clobbers registers
* as a side effect, e.g. a call.
*
* The intuition for allowing uses at the end of an Interval is that, in truth,
* the picture at a given position looks like this:
*
* | [s]
* |
* +-----|-------------+ copy{s, d} <-+
* | v | |
* + - - - - - - - - - + +--- position n
* | | | |
* +-------------|-----+ <-+
* |
* [d] v
*
* We represent an instruction with a single position `n'. All the use(s) and
* def(s) of that instruction are live at some point within it, but their
* lifetimes nonetheless do not overlap. Since we don't represent instructions
* using two position numbers, instead, we allow uses on the open end side of
* Intervals, because they don't actually conflict with, e.g., a def of another
* Interval that starts at the same position.
*/
struct Interval {
explicit Interval(Variable* var) : var(var) {}
std::string toString() const;
/*
* Endpoints. Intervals are [closed, open), just like LiveRanges.
*/
unsigned start() const { return ranges.front().start; }
unsigned end() const { return ranges.back().end; }
/*
* Head of this Interval chain.
*/
const Interval* leader() const;
Interval* leader();
/*
* Register allocation state.
*/
bool live() const;
bool constant() const;
bool spilled() const;
bool fixed() const;
/*
* Split this interval at `pos', returning the new `this->next'.
*
* If `keep_uses' is set, uses exactly at the end of the first interval will
* stay with the first split (rather than the second).
*
* @requires: pos > start() && pos < end(); this ensures that both
* subintervals are nonempty.
*/
Interval* split(unsigned pos, bool keep_uses = false);
/////////////////////////////////////////////////////////////////////////////
// Queries.
//
// These operate only on `this' and not its children.
/*
* Get the index of the first range or use that is not strictly lower than
* `pos' (i.e., which contains/is at `pos' or is strictly higher than `pos').
*/
unsigned findRange(unsigned pos) const;
unsigned findUse(unsigned pos) const;
/*
* Whether there is a range that includes `pos', or a use at `pos'.
*/
bool covers(unsigned pos) const;
bool usedAt(unsigned pos) const;
/*
* The position of a use [relative to `pos'] that requires a register (i.e.,
* CopySrc uses are ignored).
*
* firstUseAfter: The first use >= `pos', kMaxPos if there are no more uses.
* lastUseBefore: The first use <= `pos'; 0 if the first use is after `pos'.
* firstUse: The first use in `this'.
*/
unsigned firstUseAfter(unsigned pos) const;
unsigned lastUseBefore(unsigned pos) const;
unsigned firstUse() const;
public:
// The Variable whose (sub-)lifetime this (sub-)interval represents.
Variable* const var;
// Pointer to the next subinterval.
Interval* next{nullptr};
// Live ranges and use positions.
jit::vector<LiveRange> ranges;
jit::vector<Use> uses;
// The physical register assigned to this subinterval.
PhysReg reg;
};
/*
* Metadata about a variable which is the object of register allocation.
*
* Each Variable represents a Vreg in the Vunit we're performing register
* allocation on---including lifetime ranges, uses, def information, etc.
*/
struct Variable {
explicit Variable(Vreg r) : vreg(r) {}
static Variable* create(Vreg r);
static void destroy(Variable* var);
/*
* Accessors.
*/
Interval* ivl() const { return (Interval*)(this + 1); }
bool fixed() const { return vreg.isPhys(); }
/*
* Return the subinterval which covers or has a use at `pos', else nullptr if
* no subinterval does.
*/
Interval* ivlAt(unsigned pos);
Interval* ivlAtUse(unsigned pos);
public:
// The Vreg this value represents.
const Vreg vreg;
// Whether this is a SIMD value.
bool wide{false};
// Whether this variable is a constant, and its constant value.
bool constant{false};
Vconst val;
// The spill slot assigned to this value. Since we don't spill physical
// registers, and Vregs are SSA, a value's assigned slot never changes.
int slot{kInvalidSpillSlot};
// Position of, and block containing, the variable's single def instruction;
// invalid for physical registers.
unsigned def_pos{kMaxPos};
Vlabel def_block;
// List of recordbasenativesps this vreg is live across.
jit::vector<unsigned> recordbasenativesps{};
// Copy of the Vreg's def instruction.
Vinstr def_inst;
};
using LiveSet = boost::dynamic_bitset<>; // Bitset of Vreg numbers.
template<class Fn> void forEach(const LiveSet& bits, Fn fn) {
for (auto i = bits.find_first(); i != bits.npos; i = bits.find_next(i)) {
fn(Vreg(i));
}
}
/*
* Sack of inputs and pre-computed data used by the main XLS algorithm.
*/
struct VxlsContext {
explicit VxlsContext(const Abi& abi)
: abi(abi)
, sp(rsp())
{
switch (arch()) {
case Arch::X64:
tmp = reg::xmm15; // reserve xmm15 to break shuffle cycles
break;
case Arch::ARM:
tmp = vixl::d31;
break;
}
this->abi.simdUnreserved -= tmp;
this->abi.simdReserved |= tmp;
assertx(!abi.gpUnreserved.contains(sp));
assertx(!abi.gpUnreserved.contains(tmp));
}
public:
Abi abi;
// Arch-dependent stack pointer.
PhysReg sp;
// Temp register used only for breaking cycles.
PhysReg tmp;
// Debug-only run identifier.
size_t counter;
// Sorted blocks.
jit::vector<Vlabel> blocks;
// [start,end) position of each block.
jit::vector<LiveRange> block_ranges;
// Per-block sp[offset] to spill-slots. std::nullopt in cases where we
// have still not reached the recordbasenativesp instruction.
jit::vector<Optional<int>> spill_offsets;
// Per-block live-in sets.
jit::vector<LiveSet> livein;
};
///////////////////////////////////////////////////////////////////////////////
// Interval.
const Interval* Interval::leader() const { return var->ivl(); }
Interval* Interval::leader() { return var->ivl(); }
bool Interval::live() const { return reg != InvalidReg; }
bool Interval::constant() const { return var->constant; }
bool Interval::spilled() const { return !live() && var->slot >= 0; }
bool Interval::fixed() const { return var->fixed(); }
unsigned Interval::findRange(unsigned pos) const {
unsigned lo = 0;
for (unsigned hi = ranges.size(); lo < hi;) {
auto mid = (lo + hi) / 2;
auto r = ranges[mid];
if (pos < r.start) {
hi = mid;
} else if (r.end <= pos) {
lo = mid + 1;
} else {
return mid;
}
}
assertx(lo == ranges.size() || pos < ranges[lo].start);
return lo;
}
unsigned Interval::findUse(unsigned pos) const {
unsigned lo = 0, hi = uses.size();
while (lo < hi) {
auto mid = (lo + hi) / 2;
auto u = uses[mid].pos;
if (pos < u) {
hi = mid;
} else if (u < pos) {
lo = mid + 1;
} else {
return mid;
}
}
assertx(lo == uses.size() || pos < uses[lo].pos);
return lo;
}
bool Interval::covers(unsigned pos) const {
if (pos < start() || pos >= end()) return false;
auto i = ranges.begin() + findRange(pos);
return i != ranges.end() && i->contains(pos);
}
bool Interval::usedAt(unsigned pos) const {
if (pos < start() || pos > end()) return false;
auto i = uses.begin() + findUse(pos);
return i != uses.end() && pos == i->pos;
}
unsigned Interval::firstUseAfter(unsigned pos) const {
for (auto& u : uses) {
if (u.kind == Constraint::CopySrc) continue;
if (u.pos >= pos) return u.pos;
}
return kMaxPos;
}
unsigned Interval::lastUseBefore(unsigned pos) const {
auto prev = 0;
for (auto& u : uses) {
if (u.kind == Constraint::CopySrc) continue;
if (u.pos > pos) return prev;
prev = u.pos;
}
return prev;
}
unsigned Interval::firstUse() const {
for (auto& u : uses) {
if (u.kind != Constraint::CopySrc) return u.pos;
}
return kMaxPos;
}
Interval* Interval::split(unsigned pos, bool keep_uses) {
assertx(pos > start() && pos < end()); // both parts will be non-empty
auto child = jit::make<Interval>(var);
child->next = next;
next = child;
// advance r1 to the first range we want in child; maybe split a range.
auto r1 = ranges.begin() + findRange(pos);
if (pos > r1->start) { // split r at pos
child->ranges.push_back({pos, r1->end});
r1->end = pos;
r1++;
}
child->ranges.insert(child->ranges.end(), r1, ranges.end());
ranges.erase(r1, ranges.end());
// advance u1 to the first use position in child, then copy u1..end to child.
auto u1 = uses.begin() + findUse(end());
auto u2 = uses.end();
if (keep_uses) {
while (u1 != u2 && u1->pos <= end()) u1++;
} else {
while (u1 != u2 && u1->pos < child->start()) u1++;
}
child->uses.insert(child->uses.end(), u1, u2);
uses.erase(u1, u2);
return child;
}
///////////////////////////////////////////////////////////////////////////////
// Variable.
Variable* Variable::create(Vreg r) {
auto const mem = malloc(sizeof(Variable) + sizeof(Interval));
auto const var = new (mem) Variable(r);
new (reinterpret_cast<void*>(var + 1)) Interval(var);
return var;
}
void Variable::destroy(Variable* var) {
var->~Variable();
auto ivl = reinterpret_cast<Interval*>(var + 1);
ivl->~Interval();
free(var);
}
Interval* Variable::ivlAt(unsigned pos) {
for (auto ivl = this->ivl(); ivl; ivl = ivl->next) {
if (pos < ivl->start()) return nullptr;
if (ivl->covers(pos)) return ivl;
}
return nullptr;
}
Interval* Variable::ivlAtUse(unsigned pos) {
for (auto ivl = this->ivl(); ivl; ivl = ivl->next) {
if (pos < ivl->start()) return nullptr;
if (ivl->usedAt(pos)) return ivl;
}
return nullptr;
}
///////////////////////////////////////////////////////////////////////////////
/*
* Extended Linear Scan is based on Wimmer & Franz "Linear Scan Register
* Allocation on SSA Form". As currently written, it also works on non-ssa
* input.
*
* 1. Sort blocks such that all predecessors of B come before B, except
* loop-edge predecessors. If the input IR is in SSA form, this also
* implies the definition of each SSATmp comes before all uses.
*
* 2. Assign an even numbered position to every instruction. Positions
* between instructions are used to insert copies and spills. Each block
* starts with an extra position number that corresponds to an imaginary
* "label" instruction that is not physically in the vasm IR.
*
* 3. Create one interval I for each Vreg R that requires register allocation,
* by iterating blocks and instructions in reverse order, computing live
* registers as we go. Each interval consists of a sorted list of disjoint,
* live ranges covering the positions where R must be in a physical register
* or spill slot. Vregs that are constants or have forced registers
* (e.g. VmSp) are skipped. If the input is SSA, the start position of each
* interval dominates every live range and use position in the interval.
*
* 4. Process intervals in order of start position, maintaining the set of
* active (live) and inactive (not live, but with live ranges that start
* after the current interval). When choosing a register, prefer the one
* available furthest into the future. If necessary, split the current
* interval so the first part gets a register, and enqueue the rest.
* When no registers are available, choose either the current interval or
* another one to spill, trying to free up the longest-available register.
*
* Split positions must be after an interval's start position, and on or before
* the chosen split point. We're free try to choose a good position inbetween,
* for example block boundaries and cold blocks.
*
* 5. Once intervals have been walked and split, every interval has an assigned
* operand (register or spill location) for all positions where its alive.
* Visit every instruction and modify its Vreg operands to the physical
* register that was assigned.
*
* 6. Splitting creates sub-intervals that are assigned to different registers
* or spill locations, so insert resolving copies at the split positions
* between intervals that were split in a block, and copies on control-flow
* edges connecting different sub-intervals. When more than one copy occurs
* in a position, they are parallel-copies (all sources read before any dest
* is written).
*
* If any sub-interval was spilled, a single store is generated after each
* definition point.
*
* When analyzing instructions that use or define a virtual SF register
* (VregSF), eagerly rename it to the singleton PhysReg RegSF{0}, under the
* assumption that there can only be one live SF at each position. This
* reduces the number of intervals we need to process, facilitates inserting
* ldimm{0} (as xor), and is checked by checkSF().
*/
///////////////////////////////////////////////////////////////////////////////
/*
* Printing utilities.
*/
void printVariables(const char* caption,
const Vunit& unit, const VxlsContext& ctx,
const jit::vector<Variable*>& variables);
void dumpVariables(const jit::vector<Variable*>& variables,
unsigned num_spills);
/*
* The ID of the block enclosing `pos'.
*/
Vlabel blockFor(const VxlsContext& ctx, unsigned pos) {
for (unsigned lo = 0, hi = ctx.blocks.size(); lo < hi;) {
auto mid = (lo + hi) / 2;
auto r = ctx.block_ranges[ctx.blocks[mid]];
if (pos < r.start) {
hi = mid;
} else if (pos >= r.end) {
lo = mid + 1;
} else {
return ctx.blocks[mid];
}
}
always_assert(false);
return Vlabel{0xffffffff};
}
/*
* Insert `src' into `dst' before dst[j], corresponding to XLS logical
* position `pos'.
*
* Updates `j' to refer to the same instruction after the code insertions.
*/
void insertCodeAt(jit::vector<Vinstr>& dst, unsigned& j,
const jit::vector<Vinstr>& src, unsigned pos) {
auto const irctx = dst[j].irctx();
dst.insert(dst.begin() + j, src.size(), trap{TRAP_REASON});
for (auto const& inst : src) {
dst[j] = inst;
dst[j].set_irctx(irctx);
dst[j++].id = pos;
}
}
///////////////////////////////////////////////////////////////////////////////
// Pre-analysis passes.
/*
* Compute the linear position range of each block.
*
* This modifies the Vinstrs in `unit' by setting their `pos' members, in
* addition to producing the block-to-range map.
*/
jit::vector<LiveRange> computePositions(Vunit& unit,
const jit::vector<Vlabel>& blocks) {
auto block_ranges = jit::vector<LiveRange>{unit.blocks.size()};
unsigned pos = 0;
for (auto const b : blocks) {
auto& code = unit.blocks[b].code;
bool front_uses{false};
visitUses(unit, code.front(), [&](Vreg /*r*/) { front_uses = true; });
if (front_uses) {
auto irctx = code.front().irctx();
code.emplace(code.begin(), nop{}, irctx);
}
auto const start = pos;
for (auto& inst : unit.blocks[b].code) {
inst.id = pos;
pos += 2;
}
block_ranges[b] = { start, pos };
}
return block_ranges;
}
/*
* Return the effect this instruction has on the value of `sp'.
*
* Asserts (if `do_assert' is set) if an instruction mutates `sp' in an
* untrackable way.
*/
int spEffect(const Vunit& unit, const Vinstr& inst, PhysReg sp,
bool do_assert = debug) {
switch (inst.op) {
case Vinstr::push:
case Vinstr::pushf:
case Vinstr::pushm:
return -8;
case Vinstr::pushp:
case Vinstr::pushpm:
return -16;
case Vinstr::pop:
case Vinstr::popf:
case Vinstr::popm:
return 8;
case Vinstr::popp:
case Vinstr::poppm:
return 16;
case Vinstr::lea: {
auto& i = inst.lea_;
if (i.d == Vreg64(sp)) {
assertx(i.s.base == i.d && !i.s.index.isValid());
return i.s.disp;
}
return 0;
}
default:
if (do_assert) {
visitDefs(unit, inst, [&] (Vreg r) { assertx(r != sp); });
}
return 0;
}
}
/*
* Compute the offset from `sp' to the spill area at each block start.
*/
jit::vector<Optional<int>> analyzeSP(const Vunit& unit,
const jit::vector<Vlabel>& blocks,
PhysReg sp) {
auto visited = boost::dynamic_bitset<>(unit.blocks.size());
auto spill_offsets = jit::vector<Optional<int>>(unit.blocks.size());
for (auto const b : blocks) {
auto offset = visited.test(b) ? spill_offsets[b] : std::nullopt;
for (unsigned j = 0; j < unit.blocks[b].code.size(); j++) {
auto const& inst = unit.blocks[b].code[j];
if (inst.op == Vinstr::recordbasenativesp) {
assert_flog(!offset, "Block B{} Instr {} initiailizes native SP, but "
"already initialized.", size_t(b), j);
offset = 0;
} else if (inst.op == Vinstr::unrecordbasenativesp) {
assert_flog(offset, "Block B{} Instr {} uninitiailizes native SP, but "
"already uninitialized.", size_t(b), j);
assert_flog(*offset == 0, "Block B{} Instr {} uninitiailizes native SP, "
"but SPOffset is nonzero.", size_t(b), j);
offset = std::nullopt;
} else if (offset) {
*offset -= spEffect(unit, inst, sp);
}
}
for (auto const s : succs(unit.blocks[b])) {
if (visited.test(s)) {
assert_flog(offset == spill_offsets[s],
"sp mismatch on edge B{}->B{}, expected {} got {}",
size_t(b), size_t(s),
spill_offsets[s] ? std::to_string(*spill_offsets[s])
: "none",
offset ? std::to_string(*offset)
: "none");
} else {
spill_offsets[s] = offset;
visited.set(s);
}
}
}
return spill_offsets;
}
jit::vector<LiveSet> computeLiveness(const Vunit& unit,
const Abi& abi,
const jit::vector<Vlabel>& blocks) {
auto livein = jit::vector<LiveSet>{unit.blocks.size()};
auto const preds = computePreds(unit);
auto blockPO = jit::vector<uint32_t>(unit.blocks.size());
auto revBlocks = blocks;
std::reverse(begin(revBlocks), end(revBlocks));
auto wl = dataflow_worklist<uint32_t>(revBlocks.size());
for (unsigned po = 0; po < revBlocks.size(); po++) {
wl.push(po);
blockPO[revBlocks[po]] = po;
}
while (!wl.empty()) {
auto b = revBlocks[wl.pop()];
auto& block = unit.blocks[b];
// start with the union of the successor blocks
LiveSet live(unit.next_vr);
for (auto s : succs(block)) {
if (!livein[s].empty()) live |= livein[s];
}
// and now go through the instructions in the block in reverse order
for (auto const& inst : boost::adaptors::reverse(block.code)) {
RegSet implicit_uses, implicit_across, implicit_defs;
getEffects(abi, inst, implicit_uses, implicit_across, implicit_defs);
auto const vsf = Vreg{RegSF{0}};
auto const dvisit = [&] (Vreg r, Width w) {
live.reset(w == Width::Flags ? vsf : r);
};
auto const uvisit = [&] (Vreg r, Width w) {
live.set(w == Width::Flags ? vsf : r);
};
visitDefs(unit, inst, dvisit);
visit(unit, implicit_defs, dvisit);
visitUses(unit, inst, uvisit);
visit(unit, implicit_uses, uvisit);
visit(unit, implicit_across, uvisit);
}
if (live != livein[b]) {
livein[b] = live;
for (auto p : preds[b]) {
wl.push(blockPO[p]);
}
}
}
return livein;
}
///////////////////////////////////////////////////////////////////////////////
// Lifetime intervals.
/*
* Add `r' to `ivl'.
*
* This assumes that the ranges of `ivl' are in reverse order, and that `r'
* precedes or overlaps with ivl->ranges.first().
*/
void addRange(Interval* ivl, LiveRange r) {
while (!ivl->ranges.empty() && r.contains(ivl->ranges.back())) {
ivl->ranges.pop_back();
}
if (ivl->ranges.empty()) {
return ivl->ranges.push_back(r);
}
auto& first = ivl->ranges.back();
if (first.contains(r)) return;
if (r.end >= first.start) {
first.start = r.start;
} else {
ivl->ranges.push_back(r);
}
}
/*
* Visits defs of an instruction, updates their liveness, adds live ranges, and
* adds Uses with appropriate hints.
*/
struct DefVisitor {
DefVisitor(const Vunit& unit, jit::vector<Variable*>& variables,
LiveSet& live, Vlabel b, const Vinstr& inst, unsigned pos)
: m_unit(unit)
, m_variables(variables)
, m_live(live)
, m_block(b)
, m_inst(inst)
, m_pos(pos)
{}
// Skip immediates and uses.
template<class F> void imm(const F&) {}
template<class R> void use(R) {}
template<class S, class H> void useHint(S, H) {}
template<class R> void across(R) {}
void def(Vtuple defs) {
for (auto r : m_unit.tuples[defs]) def(r);
}
void defHint(Vtuple def_tuple, Vtuple hint_tuple) {
auto& defs = m_unit.tuples[def_tuple];
auto& hints = m_unit.tuples[hint_tuple];
for (int i = 0; i < defs.size(); i++) {
def(defs[i], Constraint::Any, hints[i]);
}
}
template<class R> void def(R r) {
def(r, constraint(r), Vreg{}, is_wide(r));
}
template<class D, class H> void defHint(D dst, H hint) {
def(dst, constraint(dst), hint, is_wide(dst));
}
void def(Vreg r) { def(r, Constraint::Any); }
void defHint(Vreg d, Vreg hint) { def(d, Constraint::Any, hint); }
void def(RegSet rs) { rs.forEach([&](Vreg r) { def(r); }); }
void def(VregSF r) {
r = RegSF{0}; // eagerly rename all SFs
def(r, constraint(r));
}
private:
void def(Vreg r, Constraint kind, Vreg hint = Vreg{}, bool wide = false) {
auto var = m_variables[r];
if (m_live.test(r)) {
m_live.reset(r);
var->ivl()->ranges.back().start = m_pos;
} else {
if (!var) {
var = m_variables[r] = Variable::create(r);
}
addRange(var->ivl(), {m_pos, m_pos + 1});
}
if (!var->fixed()) {
var->ivl()->uses.push_back(Use{kind, m_pos, hint});
var->wide |= wide;
var->def_pos = m_pos;
var->def_block = m_block;
var->def_inst = m_inst;
}
}
private:
const Vunit& m_unit;
jit::vector<Variable*>& m_variables;
LiveSet& m_live;
Vlabel m_block;
const Vinstr& m_inst;
unsigned m_pos;
};
struct UseVisitor {
UseVisitor(const Vunit& unit, jit::vector<Variable*>& variables,
LiveSet& live, const Vinstr& inst, LiveRange range)
: m_unit(unit)
, m_variables(variables)
, m_live(live)
, m_inst(inst)
, m_range(range)
{}
// Skip immediates and defs.
template<class F> void imm(const F&) {}
template<class R> void def(R) {}
template<class D, class H> void defHint(D, H) {}
template<class R> void use(R r) { use(r, constraint(r), m_range.end); }
template<class S, class H> void useHint(S src, H hint) {
use(src, constraint(src), m_range.end, hint);
}
void use(VregSF r) {
r = RegSF{0}; // eagerly rename all SFs
use(r, constraint(r), m_range.end);
}
void use(RegSet regs) { regs.forEach([&](Vreg r) { use(r); }); }
void use(Vtuple uses) { for (auto r : m_unit.tuples[uses]) use(r); }
void useHint(Vtuple src_tuple, Vtuple hint_tuple) {
auto& uses = m_unit.tuples[src_tuple];
auto& hints = m_unit.tuples[hint_tuple];
for (int i = 0, n = uses.size(); i < n; i++) {
useHint(uses[i], hints[i]);
}
}
void use(Vptr m) {
if (m.base.isValid()) use(m.base);
if (m.index.isValid()) use(m.index);
}
template<Width w> void use(Vp<w> m) { use(static_cast<Vptr>(m)); }
void use(VcallArgsId /*id*/) {
always_assert(false && "vcall unsupported in vxls");
}
/*
* An operand marked as UA means use-across. Mark it live across the
* instruction so its lifetime conflicts with the destination, which ensures
* it will be assigned a different register than the destination. This isn't
* necessary if *both* operands of a binary instruction are the same virtual
* register, but is still correct.
*/
template<class R> void across(R r) { use(r, constraint(r), m_range.end + 1); }
void across(Vtuple uses) { for (auto r : m_unit.tuples[uses]) across(r); }
void across(RegSet regs) { regs.forEach([&](Vreg r) { across(r); }); }
void across(Vptr m) {
if (m.base.isValid()) across(m.base);
if (m.index.isValid()) across(m.index);
}
template<Width w> void across(Vp<w> m) { across(static_cast<Vptr>(m)); }
private:
void use(Vreg r, Constraint kind, unsigned end, Vreg hint = Vreg{}) {
m_live.set(r);
auto var = m_variables[r];
if (!var) var = m_variables[r] = Variable::create(r);
addRange(var->ivl(), {m_range.start, end});
if (!var->fixed()) {
if (m_inst.op == Vinstr::copyargs ||
m_inst.op == Vinstr::copy2 ||
m_inst.op == Vinstr::copy ||
m_inst.op == Vinstr::phijmp) {
// all these instructions lower to parallel copyplans, which know
// how to load directly from constants or spilled locations
kind = Constraint::CopySrc;
}
var->ivl()->uses.push_back({kind, m_range.end, hint});
}
}
private:
const Vunit& m_unit;
jit::vector<Variable*>& m_variables;
LiveSet& m_live;
const Vinstr& m_inst;
const LiveRange m_range;
};
/*
* Compute lifetime intervals and use positions of all Vregs by walking the
* code bottom-up once.
*/
jit::vector<Variable*> buildIntervals(const Vunit& unit,
const VxlsContext& ctx) {
ONTRACE(kVasmRegAllocDetailLevel, printCfg(unit, ctx.blocks));
auto variables = jit::vector<Variable*>{unit.next_vr};
for (auto b : boost::adaptors::reverse(ctx.blocks)) {
auto& block = unit.blocks[b];
// initial live set is the union of successor live sets.
LiveSet live(unit.next_vr);
for (auto s : succs(block)) {
always_assert(!ctx.livein[s].empty());
live |= ctx.livein[s];
}
// add a range covering the whole block to every live interval
auto& block_range = ctx.block_ranges[b];
forEach(live, [&](Vreg r) {
if (!variables[r]) variables[r] = Variable::create(r);
addRange(variables[r]->ivl(), block_range);
});
// visit instructions bottom-up, adding uses & ranges
auto pos = block_range.end;
for (auto const& inst : boost::adaptors::reverse(block.code)) {
pos -= 2;
RegSet implicit_uses, implicit_across, implicit_defs;
getEffects(ctx.abi, inst, implicit_uses, implicit_across, implicit_defs);
DefVisitor dv(unit, variables, live, b, inst, pos);
visitOperands(inst, dv);
dv.def(implicit_defs);
UseVisitor uv(unit, variables, live, inst, {block_range.start, pos});
visitOperands(inst, uv);
uv.use(implicit_uses);
uv.across(implicit_across);
if (inst.op == Vinstr::recordbasenativesp) {
forEach(live, [&](Vreg r) {
if (!unit.regToConst.count(r)) {
// We mark the instruction as a use so no spills span the
// instruction unless they have to.
uv.use(r);
if (!variables[r]) variables[r] = Variable::create(r);
variables[r]->recordbasenativesps.push_back(pos);
}
});
} else if (inst.op == Vinstr::unrecordbasenativesp) {
forEach(live, [&](Vreg r) {
if (!unit.regToConst.count(r)) {
// We mark the instruction as a use so no spills span the
// instruction unless they have to.
uv.use(r);
}
});
}
}
// sanity check liveness computation
always_assert(live == ctx.livein[b]);
}
// finish processing live ranges for constants
for (auto& c : unit.constToReg) {
if (auto var = variables[c.second]) {
var->ivl()->ranges.back().start = 0;
var->def_pos = 0;
var->constant = true;
var->val = c.first;
}
}
// Ranges and uses were generated in reverse order. Unreverse them now.
for (auto var : variables) {
if (!var) continue;
auto ivl = var->ivl();
assertx(!ivl->ranges.empty()); // no empty intervals
std::reverse(ivl->uses.begin(), ivl->uses.end());
std::reverse(ivl->ranges.begin(), ivl->ranges.end());
}
ONTRACE(kVasmRegAllocDetailLevel,
printVariables("after building intervals", unit, ctx, variables);
);
if (debug) {
// only constants and physical registers can be live-into the entry block.
forEach(ctx.livein[unit.entry], [&](Vreg r) {
UNUSED auto var = variables[r];
assertx(var->constant || var->fixed());
});
for (auto var : variables) {
if (!var) continue;
auto const ivl = var->ivl();
for (unsigned i = 1; i < ivl->uses.size(); i++) {
assertx(ivl->uses[i].pos >= ivl->uses[i-1].pos); // monotonic
}
for (unsigned i = 1; i < ivl->ranges.size(); i++) {
assertx(ivl->ranges[i].end > ivl->ranges[i].start); // no empty ranges
assertx(ivl->ranges[i].start > ivl->ranges[i-1].end); // no empty gaps
}
}
}
return variables;
}
///////////////////////////////////////////////////////////////////////////////
/*
* A map from PhysReg number to position.
*/
using PosVec = PhysReg::Map<unsigned>;
/*
* Between `r1' and `r2', choose the one whose position in `pos_vec' is closest
* to but still after (or at) `pos'.
*
* If neither has a position after `pos', choose the higher one. If both have
* the same position, choose `r1'.
*/
PhysReg choose_closest_after(unsigned pos, const PosVec& pos_vec,
PhysReg r1, PhysReg r2) {
if (r1 == InvalidReg) return r2;
if (r2 == InvalidReg) return r1;
if (pos_vec[r1] >= pos) {
if (pos <= pos_vec[r2] && pos_vec[r2] < pos_vec[r1]) {
return r2;
}
} else {
if (pos_vec[r2] > pos_vec[r1]) {
return r2;
}
}
return r1;
}
/*
* Like choose_closest_after(), but iterates through `pos_vec'.
*/
PhysReg find_closest_after(unsigned pos, const PosVec& pos_vec) {
auto ret = InvalidReg;
for (auto const r : pos_vec) {
ret = choose_closest_after(pos, pos_vec, ret, r);
}
return ret;
}
///////////////////////////////////////////////////////////////////////////////
// Hints.
/*
* A member of a "phi group"---i.e., the set of all variables which flow into
* one another via a set of corresponding phijmp's and phidef's.
*/
struct PhiVar {
Vreg r; // the Vreg that is used or def'd in the phi
unsigned pos; // position of the phijmp or phidef
};
/*
* Hint metadata that doesn't fit into the `variables' vector.
*/
struct HintInfo {
jit::vector<jit::vector<PhiVar>> phi_groups;
};
/*
* Add the phi use of `r' at `pos' to the phi group given by `pgid'.
*/
void addPhiGroupMember(const jit::vector<Variable*>& variables,
jit::vector<jit::vector<PhiVar>>& phi_groups,
Vreg r, unsigned pos, int pgid,
Vreg hint = Vreg{}) {
assertx(phi_groups.size() > pgid);
assertx(!phi_groups[pgid].empty());
auto const var = variables[r];
assertx(var != nullptr);
auto const ivl = var->ivl();
auto& u = pos == var->def_pos
? ivl->uses.front()
: ivl->uses[ivl->findUse(pos)];
assertx(u.pos == pos);
if (hint.isValid()) u.hint = hint;
u.phi_group = pgid;
phi_groups[pgid].push_back({r, u.pos});
}
/*
* Collect hint metadata for phis.
*
* The return value of this pass represents an equivalence relation over phi
* uses and defs. The relation is defined as follows:
*
* Consider a phidef F with arity n (i.e., it defines n variables). F gives
* rise to n equivalence classes, each of which consists in the i-th use or def
* variable from each phi instruction in the transitive closure of the predicate
*
* P(f) := { every phijmp which targets f, and every other phidef
* targeted by those phijmp's }
*
* applied to F.
*
* Or, taking a pictoral example:
* _________________________________________________________
* | |
* | +---------+ |
* | | | |
* | v | |
* | phijmp(x1, y1) phijmp(x2, y2) phijmp(x3, y3) . |
* | | | | . |
* | +-----------------+ +-------------+ . |
* | | | | |
* | v v | |
* | phidef(x4, y4) phidef(x5, y5) -------------------+ |
* | | |
* |______ . ________________________________________________|
* .
* .
* ______ | __________________________
* | v |
* | phijmp(x6, y6)--->phidef(x7, y7) |
* |___________________________________|
*
* The equivalence classes in this example are {x1, x2, x4}, {y1, y2, y4},
* {x3, x5}, {y3, y5}, {x6, x7}, and {y6, y7}.
*
* We use these equivalence classes during register allocation to try to assign
* the same register to all the variables in a phi group (at least at those
* positions where the phi instructions occur).
*
* Note that this equivalence relation does /not/ capture the idea that we
* probably want the same register for x4 as we do for x6 and x7. To track that
* information, we set the `hint' on phi uses to the corresponding phidef
* variable, then let the hint back propagation pass handle it. (Note that we
* do /not/ set the `hint' field at phi defs, nor do we try to use it at any
* point.)
*/
jit::vector<jit::vector<PhiVar>>
analyzePhiHints(const Vunit& unit, const VxlsContext& ctx,
const jit::vector<Variable*>& variables) {
auto phi_groups = jit::vector<jit::vector<PhiVar>>{};
/*
* Get the phi group for r's def, optionally allocating a new phi group if
* necessary.
*/
auto const def_phi_group = [&] (Vreg r, bool alloc) {
auto const var = variables[r];
assertx(var != nullptr);
assertx(var->def_inst.op == Vinstr::phidef);
auto& u = var->ivl()->uses.front();
if (u.phi_group == kInvalidPhiGroup && alloc) {
u.phi_group = phi_groups.size();
phi_groups.emplace_back(jit::vector<PhiVar>{{r, u.pos}});
}
return u.phi_group;
};
auto const is_fixed = [&] (Vreg r) { return variables[r]->fixed(); };
for (auto const b : ctx.blocks) {
auto const& code = unit.blocks[b].code;
if (code.empty()) continue;
auto const& first = code.front();
auto const& last = code.back();
if (first.op == Vinstr::phidef) {
// A phidef'd variable just need to be assigned a phi group if it doesn't
// already have one (i.e., from handling a phijmp).
auto const& defs = unit.tuples[first.phidef_.defs];
for (auto const r : defs) {
if (is_fixed(r)) continue;
def_phi_group(r, true);
}
}
if (last.op == Vinstr::phijmp) {
auto const target = last.phijmp_.target;
auto const& def_inst = unit.blocks[target].code.front();
assertx(def_inst.op == Vinstr::phidef);
auto const& uses = unit.tuples[last.phijmp_.uses];
auto const& defs = unit.tuples[def_inst.phidef_.defs];
assertx(uses.size() == defs.size());
// Set the phi group for each phi use variable to that of the
// corresponding phi def variable (allocating a new group if the def
// variable has not already been assigned one).
for (size_t i = 0, n = uses.size(); i < n; ++i) {
if (is_fixed(defs[i])) continue;
auto const pgid = def_phi_group(defs[i], true);
addPhiGroupMember(variables, phi_groups,
uses[i], last.id, pgid, defs[i]);
}
}
}
return phi_groups;
}
HintInfo analyzeHints(const Vunit& unit, const VxlsContext& ctx,
const jit::vector<Variable*>& variables) {
HintInfo hint_info;
hint_info.phi_groups = analyzePhiHints(unit, ctx, variables);
return hint_info;
}
/*
* Try to return the physical register that would coalesce `ivl' with its
* hinted source.
*/
PhysReg tryCoalesce(const jit::vector<Variable*>& variables,
const Interval* ivl) {
assertx(ivl == ivl->leader());
assertx(!ivl->uses.empty());
auto const& def = ivl->uses.front();
assertx(def.pos == ivl->var->def_pos);
if (!def.hint.isValid()) return InvalidReg;
auto const hint_var = variables[def.hint];
for (auto hvl = hint_var->ivl(); hvl; hvl = hvl->next) {
if (def.pos == hvl->end()) return hvl->reg;
}
return InvalidReg;
}
/*
* Scan the uses of `ivl' for phi nodes, and return a pair of hints for the
* first one we find.
*
* For a given phi node F, the first of the two hints we return is derived from
* only those phijmps targeting F (if F is a phidef), or from the phidefs F
* targets (if F is a phijmp). The second hint accounts for F's entire
* equivalence class---see analyzePhiHints() for more details.
*/
Optional<std::pair<PhysReg,PhysReg>>
tryPhiHint(const jit::vector<Variable*>& variables,
const Interval* ivl, const HintInfo& hint_info,
const PosVec& free_until) {
auto const choose = [&] (PhysReg h1, PhysReg h2) {
return choose_closest_after(ivl->end(), free_until, h1, h2);
};
for (auto const& u : ivl->uses) {
// Look for the first phi hint.
if (u.phi_group == kInvalidPhiGroup) continue;
auto preferred = InvalidReg;
auto group_hint = InvalidReg;
// Choose a register to be the hint.
for (auto const& phiv : hint_info.phi_groups[u.phi_group]) {
auto const var = variables[phiv.r];
assertx(var);
auto const phivl = var->ivlAtUse(phiv.pos);
if (phivl->reg == InvalidReg) continue;
auto const& phu = phivl->uses[phivl->findUse(phiv.pos)];
assertx(phu.pos == phiv.pos);
// Prefer to use a hint from a corresponding phi node.
if (u.hint == phiv.r || phu.hint == ivl->var->vreg) {
preferred = choose(preferred, phivl->reg);
}
// In the end, though, we're okay with any hint from the group.
group_hint = choose(group_hint, phivl->reg);
}
return std::make_pair(preferred, group_hint);
}
return std::nullopt;
}
/*
* Choose an appropriate hint for the (sub-)interval `ivl'.
*
* The register allocator passes us constraints via `free_until' and `allow'.
* We use the `free_until' vector to choose a hint that most tightly covers the
* lifetime of `ivl'; we use `allow' to ensure that our hint satisfies
* constraints.
*/
PhysReg chooseHint(const jit::vector<Variable*>& variables,
const Interval* ivl, const HintInfo& hint_info,
const PosVec& free_until, RegSet allow) {
if (!RuntimeOption::EvalHHIREnablePreColoring &&
!RuntimeOption::EvalHHIREnableCoalescing) return InvalidReg;
auto const choose = [&] (PhysReg h1, PhysReg h2) {
return choose_closest_after(ivl->end(), free_until, h1, h2);
};
auto hint = InvalidReg;
// If `ivl' contains its def, try a coalescing hint.
if (ivl == ivl->leader()) {
hint = tryCoalesce(variables, ivl);
}
// Return true if we should return `h'.
auto const check_and_trace = [&] (PhysReg h, const char* prefix) {
if (h == InvalidReg) return false;
if (allow.contains(h)) {
FTRACE(kHintLevel, "{}hinting {} for %{} @ {}\n",
prefix, show(h), size_t(ivl->var->vreg), ivl->start());
return true;
}
FTRACE(kHintLevel, " found mismatched hint for %{} @ {}\n",
size_t(ivl->var->vreg), ivl->uses.front().pos);
return false;
};
auto const is_phi_use = !ivl->uses.empty() &&
ivl->uses.front().phi_group != kInvalidPhiGroup;
// If we have either a backwards or a forwards hint, return it---if we have
// both, return whichever is free the longest. For phi uses, however, we
// want to consider the whole phi group's allocations first.
if (!is_phi_use && check_and_trace(hint, "")) return hint;
// Try to determine a hint via the first phi use in the interval.
auto const phi_hints = tryPhiHint(variables, ivl, hint_info, free_until);
bool is_phi_hint = false;
if (phi_hints) {
hint = choose(phi_hints->first, hint);
if (hint != phi_hints->first) {
hint = choose(hint, phi_hints->second);
}
is_phi_hint = hint == phi_hints->first ||
hint == phi_hints->second;
}
if (phi_hints || is_phi_use) {
if (check_and_trace(hint, debug && is_phi_hint ? "phi-" : "")) {
return hint;
}
}
// Crawl through our uses and look for a physical hint.
for (auto const& u : ivl->uses) {
if (!u.hint.isValid() ||
!u.hint.isPhys() ||
!allow.contains(u.hint)) continue;
hint = choose(hint, u.hint);
}
if (hint != InvalidReg) {
FTRACE(kHintLevel, "physical hint {} for %{} @ {}\n",
show(hint), size_t(ivl->var->vreg), ivl->start());
}
return hint;
}
///////////////////////////////////////////////////////////////////////////////
// Register allocation.
/*
* Information about spills generated by register allocation.
*
* Used for the allocateSpillSpace() pass which inserts the instructions that
* create spill space on the stack.
*/
struct SpillInfo {
// Number of intervals spilled.
unsigned num_spills{0};
// Number of spill slots used.
size_t used_spill_slots{0};
};
/*
* Extended Linear Scan register allocator over vasm virtual registers (Vregs).
*
* This encapsulates the intermediate data structures used during the
* allocation phase of the algorithm so we don't have to pass them around
* everywhere.
*/
struct Vxls {
Vxls(const VxlsContext& ctx,
const jit::vector<Variable*>& variables,
const HintInfo& hint_info)
: ctx(ctx)
, variables(variables)
, hint_info(hint_info)
{}
SpillInfo go();
private:
void assignSpill(Interval* ivl);
void spill(Interval*);
void assignReg(Interval*, PhysReg);
unsigned nearestSplitBefore(unsigned pos);
unsigned constrain(Interval*, RegSet&);
void update(Interval*);
void allocate(Interval*);
void allocBlocked(Interval*);
void spillOthers(Interval* current, PhysReg r);
private:
/*
* Comparison function for pending priority queue.
*
* std::priority_queue requires a less operation, but sorts the heap
* highest-first; we need the opposite (lowest-first), so use greater-than.
*/
struct Compare {
bool operator()(const Interval* i1, const Interval* i2) {
return i1->start() > i2->start();
}
};
private:
const VxlsContext& ctx;
// Variables, null if unused.
const jit::vector<Variable*>& variables;
// Hint metadata.
const HintInfo& hint_info;
// Subintervals sorted by Interval start.
jit::priority_queue<Interval*,Compare> pending;
// Intervals that overlap.
jit::vector<Interval*> active, inactive;
// Last position each spill slot was owned; kMaxPos means currently used.
jit::array<unsigned, kMaxSpillSlots> spill_slots{{0}};
// Stats on spills.
SpillInfo spill_info;
};
SpillInfo Vxls::go() {
for (auto var : variables) {
if (!var) continue;
if (var->fixed()) {
assignReg(var->ivl(), var->vreg);
} else if (var->constant) {
spill(var->ivl());
} else {
pending.push(var->ivl());
}
}
while (!pending.empty()) {
auto current = pending.top();
pending.pop();
update(current);
allocate(current);
}
return spill_info;
}
/*
* Assign the next available spill slot to `ivl'.
*/
void Vxls::assignSpill(Interval* ivl) {
assertx(!ivl->fixed() && ivl != ivl->leader());
if (ivl->var->slot != kInvalidSpillSlot) return;
auto& used_spill_slots = spill_info.used_spill_slots;
auto const assign_slot = [&] (size_t slot) {
ivl->var->slot = slot;
++spill_info.num_spills;
spill_slots[slot] = kMaxPos;
if (!ivl->var->wide) {
used_spill_slots = std::max(used_spill_slots, slot + 1);
} else {
used_spill_slots = std::max(used_spill_slots, slot + 2);
spill_slots[slot + 1] = kMaxPos;
}
};
// Assign spill slots. We track the highest position at which a spill slot
// was owned, and only reassign it to a Vreg if its lifetime interval
// (including all splits) is strictly above that high water mark.
if (!ivl->var->wide) {
for (size_t slot = 0, n = spill_slots.size(); slot < n; ++slot) {
if (ivl->leader()->start() >= spill_slots[slot]) {
return assign_slot(slot);
}
}
} else {
for (size_t slot = 0, n = spill_slots.size() - 1; slot < n; slot += 2) {
if (ivl->leader()->start() >= spill_slots[slot] &&
ivl->leader()->start() >= spill_slots[slot + 1]) {
return assign_slot(slot);
}
}
}
// Ran out of spill slots.
ONTRACE(kVasmRegAllocDetailLevel,
dumpVariables(variables, spill_info.num_spills));
TRACE(1, "vxls-punt TooManySpills\n");
TRACE_PUNT("LinearScan_TooManySpills");
}
/*
* Assign `r' to `ivl'.
*/
void Vxls::assignReg(Interval* ivl, PhysReg r) {
if (!ivl->fixed() && ivl->uses.empty()) {
ivl->reg = InvalidReg;
if (!ivl->constant()) assignSpill(ivl);
} else {
ivl->reg = r;
active.push_back(ivl);
}
}
/*
* Spill `ivl' from its start until its first register use.
*
* Spill `ivl' if there is no use; otherwise split the interval just before the
* use, and enqueue the second part.
*/
void Vxls::spill(Interval* ivl) {
unsigned first_use = ivl->firstUse();
if (first_use <= ivl->end()) {
auto split_pos = nearestSplitBefore(first_use);
if (split_pos <= ivl->start()) {
// This only can happen if we need more than the available registers
// at a single position. It can happen in phijmp or callargs.
TRACE(1, "vxls-punt RegSpill\n");
TRACE_PUNT("RegSpill"); // cannot split before first_use
}
pending.push(ivl->split(split_pos));
}
ivl->reg = InvalidReg;
if (!ivl->constant()) assignSpill(ivl);
}
/*
* Update the active and inactive lists for the start of `current'.
*/
void Vxls::update(Interval* current) {
auto const pos = current->start();
auto const free_spill_slot = [this] (Interval* ivl) {
assertx(!ivl->next);
auto slot = ivl->var->slot;
if (slot != kInvalidSpillSlot) {
if (ivl->var->wide) {
assertx(spill_slots[slot + 1]);
spill_slots[slot + 1] = ivl->end();
}
assertx(spill_slots[slot]);
spill_slots[slot] = ivl->end();
}
};
// Check for active/inactive intervals that have expired or which need their
// polarity flipped.
auto const update_list = [&] (jit::vector<Interval*>& target,
jit::vector<Interval*>& other,
bool is_active) {
auto end = target.end();
for (auto i = target.begin(); i != end;) {
auto ivl = *i;
if (pos >= ivl->end()) {
*i = *--end;
if (!ivl->next) free_spill_slot(ivl);
} else if (is_active ? !ivl->covers(pos) : ivl->covers(pos)) {
*i = *--end;
other.push_back(ivl);
} else {
i++;
}
}
target.erase(end, target.end());
};
update_list(active, inactive, true);
update_list(inactive, active, false);
}
/*
* Return the closest split position on or before `pos'.
*
* The result might be exactly on an edge, or in-between instruction positions.
*/
unsigned Vxls::nearestSplitBefore(unsigned pos) {
auto b = blockFor(ctx, pos);
auto range = ctx.block_ranges[b];
if (pos == range.start) return pos;
return (pos - 1) | 1;
}
/*
* Constrain the allowable registers for `ivl' by inspecting uses.
*
* Returns the latest position for which `allow' (which we populate) is valid.
* We use this return value to fill the `free_until' PosVec in allocate()
* below. That data structure tracks the first position at which a register is
* /unavailable/, so it would appear that constrain()'s return value is
* off-by-one.
*
* In fact, it is not; we actually /need/ this position offsetting because of
* our leniency towards having uses at an Interval's end() position. If we
* fail to constrain on an end-position use, we must still split and spill.
* (In contrast, if we intersect with another Interval on an end position use,
* it's okay because SSA tells us that the conflict must be the other
* Interval's def position, and a use and a def at the same position don't
* actually conflict; see the fun ASCII diagram that adorns the definition of
* Interval).
*/
unsigned Vxls::constrain(Interval* ivl, RegSet& allow) {
auto const any = ctx.abi.unreserved() - ctx.abi.sf; // Any but not flags.
allow = ctx.abi.unreserved();
for (auto& u : ivl->uses) {
auto need = u.kind == Constraint::Simd ? ctx.abi.simdUnreserved :
u.kind == Constraint::Gpr ? ctx.abi.gpUnreserved :
u.kind == Constraint::Sf ? ctx.abi.sf :
any; // Any or CopySrc
if ((allow & need).empty()) {
// cannot satisfy constraints; must split before u.pos
return u.pos - 1;
}
allow &= need;
}
return kMaxPos;
}
/*
* Return the next intersection point between `current' and `other', or kMaxPos
* if they never intersect.
*
* We assume that `other', if it represents an SSA variable, is not live at the
* start of `current'.
*
* Note that if two intervals intersect, the first point of intersection will
* always be the start of one of the intervals, because SSA ensures that a def
* dominates all uses, and hence all live ranges as well.
*/
unsigned nextIntersect(const Interval* current, const Interval* other) {
assertx(!current->fixed());
if (current == current->leader() &&
other == other->leader() &&
!other->fixed()) {
// Since `other' is an inactive Vreg interval, it cannot cover current's
// start, and `current' cannot cover other's start, since `other' started
// earlier. Therefore, SSA guarantees no intersection.
return kMaxPos;
}
if (current->end() <= other->start() ||
other->end() <= current->start()) {
// One ends before the other starts.
return kMaxPos;
}
// r1,e1 span all of current
auto r1 = current->ranges.begin();
auto e1 = current->ranges.end();
// r2,e2 span the tail of other that might intersect current
auto r2 = other->ranges.begin() + other->findRange(current->start());
auto e2 = other->ranges.end();
// search for the lowest position covered by current and other
for (;;) {
if (r1->start < r2->start) {
if (r2->start < r1->end) return r2->start;
if (++r1 == e1) return kMaxPos;
} else {
if (r1->start < r2->end) return r1->start;
if (++r2 == e2) return kMaxPos;
}
}
return kMaxPos;
}
void Vxls::allocate(Interval* current) {
// Map from PhysReg until the first position at which it is /not/ available.
PosVec free_until; // 0 by default
RegSet allow;
auto const conflict = constrain(current, allow);
// Mark regs that fit our constraints as free up until the point of conflict,
// unless they're owned by active intervals---then mark them used.
allow.forEach([&](PhysReg r) {
free_until[r] = conflict;
});
for (auto ivl : active) {
free_until[ivl->reg] = 0;
}
// Mark each reg assigned to an inactive interval as only free until the
// first position at which `current' intersects that interval.
for (auto ivl : inactive) {
auto r = ivl->reg;
if (free_until[r] == 0) continue;
auto until = nextIntersect(current, ivl);
free_until[r] = std::min(until, free_until[r]);
}
if (current->ranges.size() > 1) {
auto const b = blockFor(ctx, current->start());
auto const blk_range = ctx.block_ranges[b];
if (blk_range.end > current->ranges[0].end) {
// We're assigning a register to an interval with
// multiple ranges, but the vreg isn't live out
// of the first range. This means there's no
// connection between this range and any subsequent
// one, so we can safely break the interval
// after the first range without making things worse.
// On the other hand, it can make things better, by
// eg not assigning a constant to a register in an
// unlikely exit block, and then holding it in a callee save
// reg across lots of unrelated code until its used
// again in another unlikely exit block.
auto second = current->split(blk_range.end, false);
pending.push(second);
} else if (current->constant() &&
current->uses.size() &&
current->uses[0].pos >= blk_range.end) {
// we probably don't want to load a constant into a register
// at the start of a block where its not used.
return spill(current);
}
}
// Choose a register to allocate to `current', either via a hint or by
// various heuristics.
auto const choose_reg = [&] {
auto const hint = chooseHint(variables, current,
hint_info, free_until, allow);
if (hint != InvalidReg &&
free_until[hint] >= current->end()) {
return hint;
}
auto ret = InvalidReg;
for (auto const r : free_until) {
ret = choose_closest_after(current->end(), free_until, ret, r);
}
return ret;
};
auto const r = choose_reg();
auto const pos = free_until[r];
if (pos >= current->end()) {
return assignReg(current, r);
}
if (pos > current->start()) {
// `r' is free for the first part of current.
auto const prev_use = current->lastUseBefore(pos);
DEBUG_ONLY auto min_split = std::max(prev_use, current->start() + 1);
assertx(min_split <= pos);
auto split_pos = nearestSplitBefore(pos);
if (split_pos > current->start()) {
if (prev_use && prev_use < split_pos) {
// If there are uses in previous blocks, but no uses between the start
// of the block containing `split_pos' and `split_pos' itself, we
// should split earlier; otherwise we'll need to insert moves/loads on
// the edge(s) into this block, which clearly can't be used since we're
// spilling before the first use. Might as well spill on a block
// boundary, as early as possible.
auto prev_range_idx = current->findRange(prev_use);
auto prev_range = ¤t->ranges[prev_range_idx];
if (prev_range->start <= prev_use && prev_range->end < split_pos) {
prev_range++;
}
if (prev_range->start > prev_use && prev_range->start < split_pos) {
split_pos = prev_range->start;
}
}
// Split `current'. We keep uses at the end of the first split because
// we know that `r' is free up to /and including/ that position.
auto second = current->split(split_pos, true /* keep_uses */);
pending.push(second);
// Try to find a register again. Since `current' is shorter, we might
// have a different hint, or a different heuristically-determined
// best-reg.
auto const r = choose_reg();
assertx(free_until[r] >= current->end());
return assignReg(current, r);
}
}
// Must spill `current' or another victim.
allocBlocked(current);
}
/*
* When all registers are in use, find a good interval (possibly `current') to
* split and spill.
*
* When an interval is split and the second part is spilled, possibly split the
* second part again before the next use-pos that requires a register, and
* enqueue the third part.
*/
void Vxls::allocBlocked(Interval* current) {
auto const cur_start = current->start();
RegSet allow;
auto const conflict = constrain(current, allow); // repeated from allocate
// Track the positions (a) at which each PhysReg is next used by any lifetime
// interval to which it's assigned (`used'), and (b) at which each PhysReg is
// next assigned to a value whose lifetime intersects `current' (`blocked').
PosVec used, blocked;
allow.forEach([&](PhysReg r) { used[r] = blocked[r] = conflict; });
// compute next use of active registers, so we can pick the furthest one
for (auto ivl : active) {
if (ivl->fixed()) {
blocked[ivl->reg] = used[ivl->reg] = 0;
} else {
auto use_pos = ivl->firstUseAfter(cur_start);
used[ivl->reg] = std::min(use_pos, used[ivl->reg]);
}
}
// compute next intersection/use of inactive regs to find what's free longest
for (auto ivl : inactive) {
auto const r = ivl->reg;
if (blocked[r] == 0) continue;
auto intersect_pos = nextIntersect(current, ivl);
if (intersect_pos == kMaxPos) continue;
if (ivl->fixed()) {
blocked[r] = std::min(intersect_pos, blocked[r]);
used[r] = std::min(blocked[r], used[r]);
} else {
auto use_pos = ivl->firstUseAfter(cur_start);
used[r] = std::min(use_pos, used[r]);
}
}
// Choose the best victim register(s) to spill---the one with first-use
// after, but closest to, the lifetime of `current'; or the one with farthest
// first-use if no such register exists.
auto r = find_closest_after(current->end(), used);
// If all other registers are used by their owning intervals before the first
// register-use of `current', then we have to spill `current'.
if (used[r] < current->firstUse()) {
return spill(current);
}
auto const block_pos = blocked[r];
if (block_pos < current->end()) {
// If /every/ usable register is assigned to a lifetime interval which
// intersects with `current', we have to split current before that point.
auto prev_use = current->lastUseBefore(block_pos);
DEBUG_ONLY auto min_split = std::max(prev_use, cur_start + 1);
auto max_split = block_pos;
assertx(cur_start < min_split && min_split <= max_split);
auto split_pos = nearestSplitBefore(max_split);
if (split_pos > current->start()) {
auto second = current->split(split_pos, true /* keep_uses */);
pending.push(second);
}
}
spillOthers(current, r);
assignReg(current, r);
}
/*
* Split and spill other intervals that conflict with `current' for register r,
* at current->start().
*
* If necessary, split the victims again before their first use position that
* requires a register.
*/
void Vxls::spillOthers(Interval* current, PhysReg r) {
auto const cur_start = current->start();
// Split `ivl' at `cur_start' and spill the second part. If `cur_start' is
// too close to ivl->start(), spill all of `ivl' instead.
auto const spill_after = [&] (Interval* ivl) {
auto const split_pos = nearestSplitBefore(cur_start);
auto const tail = split_pos <= ivl->start() ? ivl : ivl->split(split_pos);
spill(tail);
};
// Split and spill other active intervals after `cur_start'.
auto end = active.end();
for (auto i = active.begin(); i != end;) {
auto other = *i;
if (other->fixed() || r != other->reg) {
i++; continue;
}
*i = *--end;
spill_after(other);
}
active.erase(end, active.end());
// Split and spill any inactive intervals after `cur_start' if they intersect
// with `current'.
end = inactive.end();
for (auto i = inactive.begin(); i != end;) {
auto other = *i;
if (other->fixed() || r != other->reg) {
i++; continue;
}
auto intersect = nextIntersect(current, other);
if (intersect >= current->end()) {
i++; continue;
}
*i = *--end;
spill_after(other);
}
inactive.erase(end, inactive.end());
}
SpillInfo assignRegisters(const VxlsContext& ctx,
const jit::vector<Variable*>& variables,
const HintInfo& hint_info) {
return Vxls(ctx, variables, hint_info).go();
}
///////////////////////////////////////////////////////////////////////////////
// Lifetime continuity resolution.
/*
* A pair of source block number and successor index, used to identify an
* out-edge.
*/
using EdgeKey = std::pair<Vlabel,unsigned>;
struct EdgeHasher {
size_t operator()(EdgeKey k) const {
return size_t(k.first) ^ k.second;
}
};
/*
* Copies that are required at a given position or edge.
*
* The keys into the PhysReg::Map are the dests; the Interval*'s are the
* sources (nullptr if no copy is needed).
*/
using CopyPlan = PhysReg::Map<Interval*>;
/*
* Copy and spill points for resolving split lifetime intervals.
*
* After register allocation, some lifetime intervals may have been split, and
* their Vregs assigned to different physical registers or spill locations. We
* use this struct to track where we need to add moves to maintain continuity.
* (We also use it to resolve phis.)
*/
struct ResolutionPlan {
// Where to insert spills.
jit::hash_map<unsigned,CopyPlan> spills;
// Where to insert reg-reg moves, or spill and constant loads, between
// instructions (or in place of copy{} and friends).
jit::hash_map<unsigned,CopyPlan> copies;
// Copies and loads on edges (between blocks).
jit::hash_map<EdgeKey,CopyPlan,EdgeHasher> edge_copies;
};
template<class CopyPlanT>
using void_req = typename std::enable_if<
std::is_same<CopyPlanT, CopyPlan>::value ||
std::is_same<CopyPlanT, const CopyPlan>::value
>::type;
/*
* Iterators for reg-reg copies and for {const,spill}-reg loads.
*/
template<class CopyPlanT, class F>
void_req<CopyPlanT> for_each_copy(CopyPlanT& plan, F f) {
for (auto dst : plan) {
auto& ivl = plan[dst];
if (!ivl || !ivl->live()) continue;
f(dst, ivl);
}
}
template<class CopyPlanT, class F>
void_req<CopyPlanT> for_each_load(CopyPlanT& plan, F f) {
for (auto dst : plan) {
auto& ivl = plan[dst];
if (!ivl || ivl->live()) continue;
assertx(ivl->constant() || ivl->spilled());
f(dst, ivl);
}
}
/*
* Insert a spill after the def-position in `ivl'.
*
* There's only one such position, because of SSA.
*/
void insertSpill(const VxlsContext& ctx,
ResolutionPlan& resolution, Interval* ivl) {
auto DEBUG_ONLY checkPos = [&](unsigned pos) {
assertx(pos % 2 == 1);
DEBUG_ONLY auto const b = blockFor(ctx, pos);
DEBUG_ONLY auto const& range = ctx.block_ranges[b];
assertx(pos - 1 >= range.start && pos + 1 < range.end);
return true;
};
auto const spill = [&] (unsigned pos) {
assertx(checkPos(pos));
resolution.spills[pos][ivl->reg] = ivl; // store ivl->reg => ivl->slot
};
if (ivl->var->recordbasenativesps.empty()) {
spill(ivl->var->def_pos + 1);
} else {
for (auto const pos : ivl->var->recordbasenativesps) {
always_assert_flog(ivl->covers(pos),
"Spilling required before native sp set");
spill(pos + 1);
}
}
}
/*
* Insert spills and copies that connect subintervals that were split
* between instructions.
*/
void resolveSplits(const VxlsContext& ctx,
const jit::vector<Variable*>& variables,
ResolutionPlan& resolution) {
for (auto var : variables) {
if (!var) continue;
auto i1 = var->ivl();
if (var->slot >= 0) insertSpill(ctx, resolution, i1);
for (auto i2 = i1->next; i2; i1 = i2, i2 = i2->next) {
auto const pos = i2->start();
if (i1->end() != pos) continue; // spans lifetime hole
if (i2->reg == InvalidReg) continue; // no load necessary
if (i2->reg == i1->reg) continue; // no copy necessary
auto const b = blockFor(ctx, pos);
auto const range = ctx.block_ranges[b];
if (pos % 2 == 0) {
// even position requiring a copy must be on edge
assertx(range.start == pos);
} else {
// odd position
assertx(pos > range.start); // implicit label position per block
if (pos + 1 == range.end) continue; // copy belongs on successor edge
assertx(!resolution.copies[pos][i2->reg]);
resolution.copies[pos][i2->reg] = i1;
}
}
}
}
/*
* Lower copyargs{} and copy{} into moveplans at the same position.
*/
void lowerCopies(Vunit& unit, const VxlsContext& ctx,
const jit::vector<Variable*>& variables,
ResolutionPlan& resolution) {
// Add a lifetime-resolving copy from `s' to `d'---without touching the
// instruction stream.
auto const lower = [&] (unsigned pos, Vreg s, Vreg d) {
auto v1 = variables[s];
auto v2 = variables[d];
assertx(v1 && v2);
assertx(v2->fixed() || v2->def_pos == pos); // ssa
auto i1 = v1->fixed() ? v1->ivl() : v1->ivlAtUse(pos);
auto i2 = v2->ivl();
if (i2->reg != i1->reg) {
assertx(!resolution.copies[pos][i2->reg]);
resolution.copies[pos][i2->reg] = i1;
}
};
for (auto b : ctx.blocks) {
auto pos = ctx.block_ranges[b].start;
for (auto& inst : unit.blocks[b].code) {
if (inst.op == Vinstr::copyargs) {
auto const& uses = unit.tuples[inst.copyargs_.s];
auto const& defs = unit.tuples[inst.copyargs_.d];
for (unsigned i = 0, n = uses.size(); i < n; ++i) {
lower(pos, uses[i], defs[i]);
}
inst = nop{};
} else if (inst.op == Vinstr::copy2) {
lower(pos, inst.copy2_.s0, inst.copy2_.d0);
lower(pos, inst.copy2_.s1, inst.copy2_.d1);
inst = nop{};
} else if (inst.op == Vinstr::copy) {
lower(pos, inst.copy_.s, inst.copy_.d);
inst = nop{};
}
pos += 2;
}
}
}
/*
* Search for the phidef in block `b', then return its dest tuple.
*/
Vtuple findPhiDefs(const Vunit& unit, Vlabel b) {
assertx(!unit.blocks[b].code.empty() &&
unit.blocks[b].code.front().op == Vinstr::phidef);
return unit.blocks[b].code.front().phidef_.defs;
}
/*
* Register copy resolutions for livein sets and phis.
*/
void resolveEdges(Vunit& unit, const VxlsContext& ctx,
const jit::vector<Variable*>& variables,
ResolutionPlan& resolution) {
auto const addPhiEdgeCopies = [&] (Vlabel block, Vlabel target,
uint32_t targetIndex,
const VregList& uses) {
auto const p1 = ctx.block_ranges[block].end - 2;
auto const& defs = unit.tuples[findPhiDefs(unit, target)];
for (unsigned i = 0, n = uses.size(); i < n; ++i) {
auto v1 = variables[uses[i]];
auto v2 = variables[defs[i]];
assertx(v1 && v2);
auto i1 = v1->fixed() ? v1->ivl() : v1->ivlAtUse(p1);
auto i2 = v2->ivl();
if (i2->reg != i1->reg) {
EdgeKey edge { block, targetIndex };
assertx(!resolution.edge_copies[edge][i2->reg]);
resolution.edge_copies[edge][i2->reg] = i1;
}
}
};
for (auto b1 : ctx.blocks) {
auto const p1 = ctx.block_ranges[b1].end - 2;
auto& block1 = unit.blocks[b1];
auto& inst1 = block1.code.back();
// Add resolutions for phis.
if (inst1.op == Vinstr::phijmp) {
auto const& phijmp = inst1.phijmp_;
auto const target = phijmp.target;
auto const& uses = unit.tuples[phijmp.uses];
addPhiEdgeCopies(b1, target, 0, uses);
inst1 = jmp{target};
}
auto const succlist = succs(block1);
// Add resolutions for livein sets.
for (unsigned i = 0, n = succlist.size(); i < n; i++) {
auto const b2 = succlist[i];
auto const p2 = ctx.block_ranges[b2].start;
forEach(ctx.livein[b2], [&] (Vreg vr) {
auto var = variables[vr];
if (var->fixed()) return;
Interval* i1 = nullptr;
Interval* i2 = nullptr;
for (auto ivl = var->ivl(); ivl && !(i1 && i2); ivl = ivl->next) {
if (ivl->covers(p1)) i1 = ivl;
if (ivl->covers(p2)) i2 = ivl;
}
// i2 can be unallocated if the tmp is a constant or is spilled.
if (i2->reg != InvalidReg && i2->reg != i1->reg) {
assertx((resolution.edge_copies[{b1,i}][i2->reg] == nullptr));
resolution.edge_copies[{b1,i}][i2->reg] = i1;
}
});
}
}
}
/*
* Walk through the variables list and account for all points where copies or
* spills need to be made.
*/
ResolutionPlan resolveLifetimes(Vunit& unit, const VxlsContext& ctx,
const jit::vector<Variable*>& variables) {
ResolutionPlan resolution;
resolveSplits(ctx, variables, resolution);
lowerCopies(unit, ctx, variables, resolution);
resolveEdges(unit, ctx, variables, resolution);
return resolution;
}
///////////////////////////////////////////////////////////////////////////////
// Operand renaming.
/*
* Visitor class for renaming registers.
*/
struct Renamer {
Renamer(const jit::vector<Variable*>& variables, unsigned pos)
: variables(variables)
, pos(pos)
{}
template <class T>
void imm(const T& /*r*/) {}
template<class R> void def(R& r) { rename(r); }
template<class D, class H> void defHint(D& dst, H) { rename(dst); }
template<class R> void use(R& r) { rename(r); }
template<class S, class H> void useHint(S& src, H) { rename(src); }
template<class R> void across(R& r) { rename(r); }
void across(Vptr& m) {
if (m.base.isValid()) rename(m.base);
if (m.index.isValid()) rename(m.index);
}
template<Width w> void across(Vp<w>& m) { across(static_cast<Vptr&>(m)); }
void def(RegSet) {}
void use(RegSet /*r*/) {}
void use(Vptr& m) {
if (m.base.isValid()) rename(m.base);
if (m.index.isValid()) rename(m.index);
}
template<Width w> void use(Vp<w>& m) { use(static_cast<Vptr&>(m)); }
void use(VcallArgsId) { always_assert(false && "vcall unsupported in vxls"); }
private:
void rename(Vreg8& r) { r = lookup(r, Constraint::Gpr); }
void rename(Vreg16& r) { r = lookup(r, Constraint::Gpr); }
void rename(Vreg32& r) { r = lookup(r, Constraint::Gpr); }
void rename(Vreg64& r) { r = lookup(r, Constraint::Gpr); }
void rename(VregDbl& r) { r = lookup(r, Constraint::Simd); }
void rename(Vreg128& r) { r = lookup(r, Constraint::Simd); }
void rename(VregSF& r) { r = RegSF{0}; }
void rename(Vreg& r) { r = lookup(r, Constraint::Any); }
void rename(Vtuple /*t*/) { /* phijmp+phidef handled by resolveEdges */
}
PhysReg lookup(Vreg vreg, Constraint kind) {
auto var = variables[vreg];
if (!var || vreg.isPhys()) return vreg;
PhysReg reg = var->ivlAtUse(pos)->reg;
assertx((kind == Constraint::Gpr && reg.isGP()) ||
(kind == Constraint::Simd && reg.isSIMD()) ||
(kind == Constraint::Sf && reg.isSF()) ||
(kind == Constraint::Any && reg != InvalidReg));
return reg;
}
private:
const jit::vector<Variable*>& variables;
unsigned pos;
};
/*
* Visit every virtual-register typed operand in `unit', and rename it to its
* assigned physical register.
*/
void renameOperands(Vunit& unit, const VxlsContext& ctx,
const jit::vector<Variable*>& variables) {
for (auto b : ctx.blocks) {
auto pos = ctx.block_ranges[b].start;
for (auto& inst : unit.blocks[b].code) {
Renamer renamer(variables, pos);
visitOperands(inst, renamer);
pos += 2;
}
}
ONTRACE(
kVasmRegAllocDetailLevel,
printVariables("after renaming operands", unit, ctx, variables);
);
}
///////////////////////////////////////////////////////////////////////////////
// Copy insertion.
/*
* Insert stores for `spills' (with spill space starting at `slots') into
* `code' before code[j], corresponding to XLS logical position `pos'.
*
* Updates `j' to refer to the same instruction after the code insertions.
*/
void insertSpillsAt(jit::vector<Vinstr>& code, unsigned& j,
const CopyPlan& spills, Optional<MemoryRef> slots,
unsigned pos) {
jit::vector<Vinstr> stores;
for (auto src : spills) {
auto ivl = spills[src];
if (!ivl) continue;
always_assert_flog(slots, "Spilling before native sp is set (pos {})",
pos);
auto slot = ivl->var->slot;
assertx(slot >= 0 && src == ivl->reg);
MemoryRef ptr{slots->r + slotOffset(slot)};
if (!ivl->var->wide) {
always_assert_flog(!src.isSF(), "Tried to spill %flags");
stores.emplace_back(store{src, ptr});
} else {
assertx(src.isSIMD());
stores.emplace_back(storeups{src, ptr});
}
}
insertCodeAt(code, j, stores, pos);
}
/*
* Insert reg-reg moves for `copies' into `code' before code[j], corresponding
* to XLS logical position `pos'.
*
* Updates `j' to refer to the same instruction after the code insertions.
*/
void insertCopiesAt(const VxlsContext& ctx,
jit::vector<Vinstr>& code, unsigned& j,
const CopyPlan& plan, unsigned pos) {
MovePlan moves;
jit::vector<Vinstr> copies;
for_each_copy(plan, [&] (PhysReg dst, const Interval* ivl) {
moves[dst] = ivl->reg;
});
auto const hows = doRegMoves(moves, ctx.tmp);
for (auto const& how : hows) {
if (how.m_kind == MoveInfo::Kind::Xchg) {
copies.emplace_back(copy2{how.m_src, how.m_dst, how.m_dst, how.m_src});
} else {
copies.emplace_back(copy{how.m_src, how.m_dst});
}
}
insertCodeAt(code, j, copies, pos);
}
/*
* Insert constant loads or loads from spill space---with spill space starting
* at `slots'---for `loads' into `code' before code[j], corresponding to XLS
* logical position `pos'.
*
* Updates `j' to refer to the same instruction after the code insertions.
*/
void insertLoadsAt(jit::vector<Vinstr>& code, unsigned& j,
const CopyPlan& plan, Optional<MemoryRef> slots,
unsigned pos) {
jit::vector<Vinstr> loads;
for_each_load(plan, [&] (PhysReg dst, const Interval* ivl) {
if (ivl->constant()) {
if (ivl->var->val.isUndef) return;
loads.push_back([&]() -> Vinstr {
switch (ivl->var->val.kind) {
case Vconst::Quad:
case Vconst::Double:
return ldimmq{uint64_t(ivl->var->val.val), dst};
case Vconst::Long:
return ldimml{int32_t(ivl->var->val.val), dst};
case Vconst::Byte:
return ldimmb{uint8_t(ivl->var->val.val), dst};
}
not_reached();
}());
} else if (ivl->spilled()) {
always_assert_flog(slots, "Reloading before native sp is set (pos {})",
pos);
MemoryRef ptr{slots->r + slotOffset(ivl->var->slot)};
if (!ivl->var->wide) {
loads.emplace_back(load{ptr, dst});
} else {
assertx(dst.isSIMD());
loads.emplace_back(loadups{ptr, dst});
}
}
});
insertCodeAt(code, j, loads, pos);
}
/*
* Mutate the Vinstr stream by inserting copies.
*
* This destroys the position numbering, so we can't use interval positions
* after this.
*/
void insertCopies(Vunit& unit, const VxlsContext& ctx,
const jit::vector<Variable*>& /*variables*/,
const ResolutionPlan& resolution) {
auto const getSlots = [&] (Optional<int> offset) {
Optional<MemoryRef> slots;
if (offset) slots = ctx.sp[*offset];
return slots;
};
// Insert copies inside blocks.
for (auto const b : ctx.blocks) {
auto& block = unit.blocks[b];
auto& code = block.code;
auto pos = ctx.block_ranges[b].start;
auto offset = ctx.spill_offsets[b];
for (unsigned j = 0; j < code.size(); j++, pos += 2) {
auto const slots = getSlots(offset);
// Spills, reg-reg moves, and loads of constant values or spill space all
// occur between instruction. Insert them in order.
auto s = resolution.spills.find(pos - 1);
if (s != resolution.spills.end()) {
insertSpillsAt(code, j, s->second, slots, pos - 1);
}
auto c = resolution.copies.find(pos - 1);
if (c != resolution.copies.end()) {
insertCopiesAt(ctx, code, j, c->second, pos - 1);
insertLoadsAt(code, j, c->second, slots, pos - 1);
}
// Insert copies and loads at instructions.
c = resolution.copies.find(pos);
if (c != resolution.copies.end()) {
insertCopiesAt(ctx, code, j, c->second, pos);
insertLoadsAt(code, j, c->second, slots, pos);
}
assertx(resolution.spills.count(pos) == 0);
if (code[j].op == Vinstr::recordbasenativesp) {
assert_flog(!offset, "Block B{} Instr {} initiailizes native SP, but "
"already initialized.", size_t(b), j);
offset = 0;
} else if (code[j].op == Vinstr::unrecordbasenativesp) {
assert_flog(offset, "Block B{} Instr {} uninitiailizes native SP, but "
"already uninitialized.", size_t(b), j);
assert_flog(*offset == 0, "Block B{} Instr {} uninitiailizes native "
"SP, but SP offset is non zero.", size_t(b), j);
offset = std::nullopt;
} else if (offset) {
*offset -= spEffect(unit, code[j], ctx.sp);
}
}
}
// insert copies on edges
for (auto const b : ctx.blocks) {
auto& block = unit.blocks[b];
auto succlist = succs(block);
if (succlist.size() == 1) {
// copies will go at end of b
auto const c = resolution.edge_copies.find({b, 0});
if (c != resolution.edge_copies.end()) {
auto& code = block.code;
unsigned j = code.size() - 1;
auto const pos = ctx.block_ranges[b].end - 1;
auto const offset = ctx.spill_offsets[succlist[0]];
auto const slots = getSlots(offset);
// We interleave copies and loads in `edge_copies', so here and below
// we process them separately (and pass `true' to avoid asserting).
insertCopiesAt(ctx, code, j, c->second, pos);
insertLoadsAt(code, j, c->second, slots, pos);
}
} else {
// copies will go at start of successor
for (int i = 0, n = succlist.size(); i < n; i++) {
auto s = succlist[i];
auto const c = resolution.edge_copies.find({b, i});
if (c != resolution.edge_copies.end()) {
auto& code = unit.blocks[s].code;
unsigned j = 0;
auto const pos = ctx.block_ranges[s].start;
auto const offset = ctx.spill_offsets[s];
auto const slots = getSlots(offset);
insertCopiesAt(ctx, code, j, c->second, pos);
insertLoadsAt(code, j, c->second, slots, pos);
}
}
}
}
}
///////////////////////////////////////////////////////////////////////////////
/*
* Peephole cleanup pass.
*
* Remove no-op copy sequences before allocating spill space, since doing so
* might modify the CFG.
*/
void peephole(Vunit& unit, const VxlsContext& ctx) {
// Whether a Vinstr is a register swap.
auto const match_xchg = [] (Vinstr& i, Vreg& r0, Vreg& r1) {
if (i.op != Vinstr::copy2) return false;
r0 = i.copy2_.s0;
r1 = i.copy2_.s1;
return r0 == i.copy2_.d1 && r1 == i.copy2_.d0;
};
for (auto b : ctx.blocks) {
auto& code = unit.blocks[b].code;
for (int i = 0, n = code.size(); i + 1 < n; i++) {
Vreg r0, r1, r2, r3;
if (match_xchg(code[i], r0, r1) &&
match_xchg(code[i + 1], r2, r3) &&
((r0 == r2 && r1 == r3) || (r0 == r3 && r1 == r2))) {
// matched xchg+xchg that cancel each other
code[i] = nop{};
code[i + 1] = nop{};
i++;
}
}
auto end = std::remove_if(code.begin(), code.end(), [&](Vinstr& inst) {
return is_trivial_nop(inst) ||
inst.op == Vinstr::phidef; // we lowered it
});
code.erase(end, code.end());
}
}
///////////////////////////////////////////////////////////////////////////////
// Spill space allocation.
/*
* SpillState is used by allocateSpillSpace() to decide where to allocate/free
* spill space. It represents the state of the spill space as a whole and is
* computed before each individual instruction.
*
* Order is important in this enum: it's only legal to transition to states
* with higher values, and states are merged using std::max().
*/
enum SpillState : uint8_t {
// State is uninitialized. All block in-states start here.
Uninit,
// Spill space is not currently possible; we must allocate spill space after
// this point.
NoSpillPossible,
// Spill space is not currently needed; it's safe to allocate spill space
// after this point.
NoSpill,
// Spill space is needed and must be allocated at or before this point.
NeedSpill,
};
/*
* SpillStates is used to hold in/out state for each block after the analysis
* pass of allocateSpillSpace().
*/
struct SpillStates {
SpillState in;
SpillState out;
bool changes;
bool hasIndirectFixup;
};
/*
* Returns true if spill space must be allocated before execution of this
* instruction. In order to keep things simple, we return true for any
* instruction that reads or writes sp.
*/
bool instrNeedsSpill(const Vunit& unit, const Vinstr& inst, PhysReg sp) {
// Implicit sp input/output.
if (spEffect(unit, inst, sp, false) != 0) return true;
auto foundSp = false;
visitDefs(unit, inst, [&] (Vreg r) { if (r == sp) foundSp = true; });
if (foundSp) return true;
visitUses(unit, inst, [&] (Vreg r) { if (r == sp) foundSp = true; });
return foundSp;
}
/*
* Return the required SpillState coming into inst. prevState must not be
* Uninit.
*/
SpillState instrInState(const Vunit& unit, const Vinstr& inst,
SpillState prevState, PhysReg sp) {
switch (prevState) {
case Uninit: break;
case NoSpillPossible:
if (inst.op == Vinstr::recordbasenativesp) return NoSpill;
return NoSpillPossible;
case NoSpill:
if (inst.op == Vinstr::unrecordbasenativesp) return NoSpillPossible;
if (instrNeedsSpill(unit, inst, sp)) return NeedSpill;
return NoSpill;
case NeedSpill:
if (inst.op == Vinstr::unrecordbasenativesp) return NoSpillPossible;
return NeedSpill;
}
always_assert(false);
}
/*
* Merge src into dst, returning true iff dst was changed.
*/
bool mergeSpillStates(SpillState& dst, SpillState src) {
assertx(src != Uninit);
if (dst == src) return false;
// The only permitted merges result in a state with higher values. This
// holds because we can't merge a NoSpillPossible with a non NoSpillPossible
// since that would mean a program point both can and can't have spills.
assertx(IMPLIES(src != NoSpillPossible, dst != NoSpillPossible));
auto const oldDst = dst;
dst = std::max(dst, src);
return dst != oldDst;
}
std::default_random_engine s_stressRand(0xfaceb00c);
std::uniform_int_distribution<int> s_stressDist(1,7);
/*
* If the current unit used any spill slots, allocate and free spill
* space where appropriate. Spill space is allocated right before it's
* needed and freed before any instruction that exits the unit, which
* is any block-ending instruction with no successors in the unit. The
* algorithm uses two passes:
*
* Analysis:
* - For each block in RPO:
* - Load in-state, which has been populated by at least one predecessor
* (or manually set to NoSpillPossible for the entry block).
* - Analyze each instruction in the block, determining what state the
* spill space must be in before executing it.
* - Record out-state for the block and propagate to successors. If this
* changes the in-state for any of them, enqueue them for (re)processing.
*
* Mutation:
* - For each block (we use RPO to only visit reachable blocks but order
* doesn't matter):
* - Inspect the block's in-state and out-state:
* - NoSpill in: Walk the block to see if we need to allocate spill space
* before any instructions.
* - NoSpill out: Allocate spill space on any edges to successors with
* NeedSpill in-states. Also walk block and check for deallocation of
* spill space.
* - NeedSpill out: If the block has no in-unit successors, free spill
* space before the block-end instruction.
*/
void allocateSpillSpace(Vunit& unit, const VxlsContext& ctx,
SpillInfo& spi) {
if (spi.used_spill_slots == 0) return;
Timer t(Timer::vasm_reg_alloc_spill, unit.log_entry);
// Make sure we always allocate spill space in multiples of 16 bytes, to keep
// alignment straightforward.
if (spi.used_spill_slots % 2) spi.used_spill_slots++;
FTRACE(1, "Allocating {} spill slots\n", spi.used_spill_slots);
auto const spillSize = safe_cast<int32_t>(slotOffset(spi.used_spill_slots));
// Pointer manipulation is traditionally done with lea, and it's safe to
// insert even where flags might be live.
Vinstr alloc = lea{ctx.sp[-spillSize], ctx.sp};
Vinstr free = lea{ctx.sp[spillSize], ctx.sp};
jit::vector<uint32_t> rpoIds(unit.blocks.size());
for (uint32_t i = 0; i < ctx.blocks.size(); ++i) rpoIds[ctx.blocks[i]] = i;
jit::vector<SpillStates> states(unit.blocks.size(),
{Uninit, Uninit, false, false});
states[unit.entry].in = NoSpillPossible;
dataflow_worklist<uint32_t> worklist(unit.blocks.size());
worklist.push(0);
// Walk the blocks in rpo. At the end of each block, propagate its out-state
// to successors, adding them to the worklist if their in-state
// changes. Blocks may be visited multiple times if loops are present.
while (!worklist.empty()) {
auto const label = ctx.blocks[worklist.pop()];
auto const& block = unit.blocks[label];
auto const stateIn = states[label].in;
auto state = stateIn;
for (auto& inst : block.code) {
state = instrInState(unit, inst, state, ctx.sp);
if (state != stateIn) states[label].changes = true;
if (instrHasIndirectFixup(inst)) states[label].hasIndirectFixup = true;
}
states[label].out = state;
for (auto s : succs(block)) {
if (mergeSpillStates(states[s].in, state)) {
worklist.push(rpoIds[s]);
}
}
}
// Do a single mutation pass over the blocks.
for (auto const label : ctx.blocks) {
auto state = states[label];
auto& block = unit.blocks[label];
// Any block with a state change should be walked to check for allocation
// or free of spill space.
if (state.changes) {
auto curState = state.in;
for (auto it = block.code.begin(); it != block.code.end(); ++it) {
auto const prevState = curState;
curState = instrInState(unit, *it, curState, ctx.sp);
if (curState == prevState) continue;
if (prevState != NeedSpill && curState == NeedSpill) {
assertx(prevState != NoSpillPossible);
FTRACE(3, "alloc spill before {}: {}\n", label, show(unit, *it));
alloc.set_irctx(it->irctx());
it = block.code.insert(it, alloc);
++it;
} else if (prevState == NeedSpill &&
curState == NoSpillPossible) {
FTRACE(3, "free spill before {}: {}\n", label, show(unit, *it));
free.set_irctx(it->irctx());
it = block.code.insert(it, free);
++it;
}
}
}
// Allocate spill space on edges from a NoSpill out-state to a NeedSpill
// in-state.
auto const successors = succs(block);
for (auto s : successors) {
if (state.out != states[s].in) {
assertx(state.out != NoSpillPossible && states[s].in != NoSpillPossible);
auto const shouldAlloc = state.out == NoSpill;
auto& op = shouldAlloc ? alloc : free;
FTRACE(3, "{} spill on edge from {} -> {}\n",
shouldAlloc ? "alloc" : "free", label, s);
auto it = std::prev(block.code.end());
op.set_irctx(it->irctx());
block.code.insert(it, op);
}
}
// Any block with a NeedSpill out-state and no successors must free spill
// space right before the block-end instruction. We ignore trap so spill
// space is still allocated in core files.
if (state.out == NeedSpill && successors.empty() &&
block.code.back().op != Vinstr::trap) {
auto it = std::prev(block.code.end());
FTRACE(3, "free spill before {}: {}\n", label, show(unit, (*it)));
free.set_irctx(it->irctx());
block.code.insert(it, free);
}
if (isPrologue(unit.context->kind)) {
if (state.hasIndirectFixup) {
auto blockState = state.in;
for (auto it = block.code.begin(); it != block.code.end(); ++it) {
// Note that if the instruction at the start or end of the spill
// regions has fixup, this loop does not account for it.
// This is not ideal but currently there are no instructions that
// have fixups that can start/end spill regions, so it is fine.
if (instrInState(unit, *it, blockState, ctx.sp) == NeedSpill &&
instrHasIndirectFixup(*it)) {
updateIndirectFixupBySpill(*it, spillSize);
}
}
}
}
}
}
///////////////////////////////////////////////////////////////////////////////
// Printing.
std::string Interval::toString() const {
std::ostringstream out;
auto delim = "";
if (reg != InvalidReg) {
out << show(reg);
delim = " ";
}
if (constant()) {
out << delim << folly::format("#{:08x}", var->val.val);
}
if (var->slot >= 0) {
out << delim << folly::format("[%sp+{}]", slotOffset(var->slot));
}
delim = "";
out << " [";
for (auto const& r : ranges) {
out << delim << folly::format("{}-{}", r.start, r.end);
delim = ",";
}
out << ") {";
delim = "";
for (auto const& u : uses) {
if (u.pos == var->def_pos) {
if (u.hint.isValid()) {
out << delim << "@" << u.pos << "=" << show(u.hint);
} else {
out << delim << "@" << u.pos << "=";
}
} else {
auto hint_delim = u.kind == Constraint::CopySrc ? "=?" : "=@";
if (u.hint.isValid()) {
out << delim << show(u.hint) << hint_delim << u.pos;
} else {
out << delim << hint_delim << u.pos;
}
}
delim = ",";
}
out << "}";
return out.str();
}
DEBUG_ONLY void dumpVariables(const jit::vector<Variable*>& variables,
unsigned num_spills) {
Trace::traceRelease("Spills %u\n", num_spills);
for (auto var : variables) {
if (!var || var->fixed()) continue;
Trace::traceRelease("%%%-2lu %s\n", size_t(var->vreg),
var->ivl()->toString().c_str());
for (auto ivl = var->ivl()->next; ivl; ivl = ivl->next) {
Trace::traceRelease(" %s\n", ivl->toString().c_str());
}
}
}
auto const ignore_reserved = !getenv("XLS_SHOW_RESERVED");
auto const collapse_fixed = !getenv("XLS_SHOW_FIXED");
enum Mode { Light, Heavy };
template<class Pred>
const char* draw(Variable* var, unsigned pos, Mode m, Pred covers) {
// Light Heavy
static const char* top[] = { (const char*)u8"\u2575", (const char*)u8"\u2579" };
static const char* bottom[] = { (const char*)u8"\u2577", (const char*)u8"\u257B" };
static const char* both[] = { (const char*)u8"\u2502", (const char*)u8"\u2503" };
static const char* empty[] = { " ", " " };
auto f = [&](unsigned position) {
if (!var) return false;
for (auto ivl = var->ivl(); ivl; ivl = ivl->next) {
if (covers(ivl, position)) return true;
}
return false;
};
auto s = f(pos);
auto d = pos % 2 == 1 ? s : f(pos + 1);
return ( s && !d) ? top[m] :
( s && d) ? both[m] :
(!s && d) ? bottom[m] :
empty[m];
}
DEBUG_ONLY void printInstr(std::ostringstream& str,
const Vunit& unit, const VxlsContext& ctx,
const jit::vector<Variable*>& variables,
const Vinstr& inst, Vlabel b) {
bool fixed_covers[2] = { false, false };
Variable* fixed = nullptr;
for (auto var : variables) {
if (!var) continue;
if (var->fixed()) {
if (ignore_reserved && !ctx.abi.unreserved().contains(var->vreg)) {
continue;
}
if (collapse_fixed) {
fixed = var; // can be any.
fixed_covers[0] |= var->ivl()->covers(inst.id);
fixed_covers[1] |= var->ivl()->covers(inst.id + 1);
continue;
}
}
str << " ";
str << draw(var, inst.id, Light, [&](Interval* child, unsigned p) {
return child->covers(p);
});
str << draw(var, inst.id, Heavy, [&](Interval* child, unsigned p) {
return child->usedAt(p);
});
}
str << " " << draw(fixed, inst.id, Heavy, [&](Interval*, unsigned p) {
assertx(p - inst.id < 2);
return fixed_covers[p - inst.id];
});
if (inst.id == ctx.block_ranges[b].start) {
str << folly::format(" B{: <3}", size_t(b));
} else {
str << " ";
}
str << folly::format(" {: <3} ", inst.id) << show(unit, inst) << "\n";
}
DEBUG_ONLY void printVariables(const char* caption,
const Vunit& unit, const VxlsContext& ctx,
const jit::vector<Variable*>& variables) {
std::ostringstream str;
str << "Intervals " << caption << " " << ctx.counter << "\n";
for (auto var : variables) {
if (!var) continue;
if (var->fixed()) {
if (ignore_reserved && !ctx.abi.unreserved().contains(var->vreg)) {
continue;
}
if (collapse_fixed) {
continue;
}
}
str << folly::format(" {: <2}", size_t(var->vreg));
}
str << " FX\n";
for (auto b : ctx.blocks) {
for (auto& inst : unit.blocks[b].code) {
printInstr(str, unit, ctx, variables, inst, b);
}
}
HPHP::Trace::traceRelease("%s\n", str.str().c_str());
}
///////////////////////////////////////////////////////////////////////////////
struct XLSStats {
size_t instrs{0}; // total instruction count after xls
size_t moves{0}; // number of movs inserted
size_t loads{0}; // number of loads inserted
size_t spills{0}; // number of spill-stores inserted
size_t consts{0}; // number of const-loads inserted
size_t total_copies{0}; // moves+loads+spills
};
void dumpStats(const Vunit& unit, const ResolutionPlan& resolution) {
XLSStats stats;
for (auto const& blk : unit.blocks) {
stats.instrs += blk.code.size();
}
for (auto const& kv : resolution.spills) {
auto const& spills = kv.second;
for (auto const r : spills) {
if (spills[r]) ++stats.spills;
}
}
auto const count_copies = [&] (const CopyPlan& plan) {
for_each_copy(plan, [&] (PhysReg, const Interval*) {
++stats.moves;
});
for_each_load(plan, [&] (PhysReg, const Interval* ivl) {
++(ivl->spilled() ? stats.loads : stats.consts);
});
};
for (auto const& kv : resolution.copies) count_copies(kv.second);
for (auto const& kv : resolution.edge_copies) count_copies(kv.second);
stats.total_copies = stats.moves + stats.loads + stats.spills;
FTRACE_MOD(
Trace::xls_stats, 1,
"XLS stats:\n"
" instrs: {}\n"
" moves: {}\n"
" loads: {}\n"
" spills: {}\n"
" consts: {}\n"
" total copies: {}\n",
stats.instrs, stats.moves, stats.loads, stats.spills,
stats.consts, stats.total_copies
);
if (auto entry = unit.log_entry) {
entry->setInt("xls_instrs", stats.instrs);
entry->setInt("xls_moves", stats.moves);
entry->setInt("xls_loads", stats.loads);
entry->setInt("xls_spills", stats.spills);
entry->setInt("xls_consts", stats.consts);
}
}
///////////////////////////////////////////////////////////////////////////////
}
void allocateRegistersWithXLS(Vunit& unit, const Abi& abi) {
Timer timer(Timer::vasm_reg_alloc, unit.log_entry);
auto const counter = s_counter.fetch_add(1, std::memory_order_relaxed);
assertx(check(unit));
assertx(checkNoCriticalEdges(unit));
assertx(checkNoSideExits(unit));
// Analysis passes.
VxlsContext ctx{abi};
ctx.blocks = sortBlocks(unit);
ctx.block_ranges = computePositions(unit, ctx.blocks);
ctx.spill_offsets = analyzeSP(unit, ctx.blocks, ctx.sp);
ctx.livein = computeLiveness(unit, ctx.abi, ctx.blocks);
ctx.counter = counter;
// Build lifetime intervals and analyze hints.
auto variables = buildIntervals(unit, ctx);
auto const hint_info = analyzeHints(unit, ctx, variables);
// Perform register allocation.
auto spill_info = assignRegisters(ctx, variables, hint_info);
ONTRACE(kVasmRegAllocDetailLevel,
dumpVariables(variables, spill_info.num_spills));
// Insert lifetime-resolving copies, spills, and rematerializations, and
// replace the Vreg operands in the Vinstr stream with the assigned PhysRegs.
auto const resolution = resolveLifetimes(unit, ctx, variables);
renameOperands(unit, ctx, variables);
insertCopies(unit, ctx, variables, resolution);
ONTRACE(kVasmRegAllocDetailLevel,
dumpVariables(variables, spill_info.num_spills);
printVariables("after inserting copies", unit, ctx, variables);
);
peephole(unit, ctx);
// Insert instructions for creating spill space.
allocateSpillSpace(unit, ctx, spill_info);
if (auto entry = unit.log_entry) {
entry->setInt("num_spills", spill_info.num_spills);
entry->setInt("used_spill_slots", spill_info.used_spill_slots);
}
printUnit(kVasmRegAllocLevel, "after vasm-xls", unit);
if (HPHP::Trace::moduleEnabled(Trace::xls_stats, 1) || unit.log_entry) {
dumpStats(unit, resolution);
}
// Free the variable metadata.
for (auto var : variables) {
if (!var) continue;
for (Interval* ivl = var->ivl()->next, *next = nullptr; ivl; ivl = next) {
next = ivl->next;
jit::destroy(ivl);
}
Variable::destroy(var);
}
}
///////////////////////////////////////////////////////////////////////////////
}