Jit/hir/refcount_insertion.cpp (951 lines of code) (raw):
// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
#include "Jit/bitvector.h"
#include "Jit/deopt.h"
#include "Jit/hir/analysis.h"
#include "Jit/hir/memory_effects.h"
#include "Jit/hir/optimization.h"
#include "Jit/hir/printer.h"
#include "Jit/hir/ssa.h"
#include "Jit/log.h"
#include <fmt/ostream.h>
#include <algorithm>
#include <queue>
#include <set>
#include <vector>
#define TRACE(...) JIT_LOGIF(g_debug_refcount, __VA_ARGS__)
// This file implements our reference count insertion pass. If this is your
// first time here, I recommend reading refcount_insertion.md first.
namespace jit {
namespace hir {
namespace {
// Borrow support, represented as a bit vector. The least significant
// AliasClass::kNumBits hold an AliasClass, and the rest of the bits each
// represent one Register. Only Phi inputs can be used as borrow support, and
// bits are assigned to Registers in Env's constructor.
//
// BorrowSupport starts out empty and must be initialized with a call to
// init(num_support_bits) before use.
class BorrowSupport {
static_assert(
AliasClass::kNumBits <= 64,
"AliasClass bits must fit in BitVector chunk");
public:
void clear() {
bits_.SetBitWidth(0);
}
void init(size_t num_support_bits) {
bits_.SetBitWidth(num_support_bits);
bits_.fill(0);
}
bool empty() const {
return bits_.IsEmpty();
}
bool intersects(const BorrowSupport& other) const {
return !(bits_ & other.bits_).IsEmpty();
}
bool intersects(AliasClass acls) const {
return (bits_.GetBitChunk(0) & acls.bits()) != 0;
}
bool intersects(size_t bit) const {
return bits_.GetBit(bit);
}
bool operator==(const BorrowSupport& other) const {
return bits_ == other.bits_;
}
bool operator!=(const BorrowSupport& other) const {
return !operator==(other);
}
const util::BitVector& bits() const {
return bits_;
}
void add(const BorrowSupport& other) {
bits_ |= other.bits_;
}
void add(AliasClass acls) {
bits_.SetBitChunk(0, bits_.GetBitChunk(0) | acls.bits());
}
void add(size_t bit) {
bits_.SetBit(bit, 1);
}
void remove(AliasClass acls) {
bits_.SetBitChunk(0, bits_.GetBitChunk(0) & ~acls.bits());
}
void remove(size_t bit) {
bits_.SetBit(bit, 0);
}
private:
util::BitVector bits_;
};
// The state of a live value, including arbitrarily many copies of the original
// Register (from instructions like Assign and CheckExc).
struct RegState {
RegState(Register* model) : model_{model} {
addCopy(model);
}
bool operator==(const RegState& other) const {
return model_ == other.model_ && copies_ == other.copies_ &&
kind_ == other.kind_ && support_ == other.support_;
}
bool operator!=(const RegState& other) const {
return !operator==(other);
}
// The model Register, or the original version that may or may not have been
// copied.
Register* model() const {
return model_;
}
// The most recently defined copy of the model, which may still be the model
// itself.
Register* current() const {
JIT_DCHECK(!copies_.empty(), "%s has no live copies", model_->name());
return copies_.back();
}
void addCopy(Register* copy) {
copies_.emplace_back(copy);
}
// Remove the given Register from the list of live copies, returning true iff
// there are now no more live copies.
bool killCopy(Register* copy) {
// The linear search and erase here assumes that having more than a couple
// copies of a value is rare.
auto it = std::find(copies_.begin(), copies_.end(), copy);
JIT_DCHECK(
it != copies_.end(),
"%s isn't a live copy of %s",
copy->name(),
model_->name());
copies_.erase(it);
return copies_.empty();
}
int numCopies() const {
return copies_.size();
}
Register* copy(size_t i) const {
JIT_DCHECK(i < copies_.size(), "Invalid index %d", i);
return copies_[i];
}
// Merge `from` into `this`.
void merge(const RegState& from) {
if (kind() == from.kind()) {
// The two kinds are the same, so keep that in the merged result. For
// two borrowed references, merge their support.
if (isBorrowed()) {
support().add(from.support());
}
} else if (isUncounted()) {
// Merging Uncounted with anything else takes the other state.
*this = from;
} else if (from.isUncounted()) {
// As with the previous case, use what's already in this.
} else {
// The two states are different and neither is uncounted, so one is
// borrowed and one is owned. The merged result is owned.
setOwned();
}
}
RefKind kind() const {
return kind_;
}
bool isUncounted() const {
return kind_ == RefKind::kUncounted;
}
bool isBorrowed() const {
return kind_ == RefKind::kBorrowed;
}
bool isOwned() const {
return kind_ == RefKind::kOwned;
}
void setUncounted() {
kind_ = RefKind::kUncounted;
support_.clear();
}
void setBorrowed(size_t num_support_bits) {
kind_ = RefKind::kBorrowed;
support_.init(num_support_bits);
}
void setOwned() {
kind_ = RefKind::kOwned;
support_.clear();
}
BorrowSupport& support() {
JIT_DCHECK(isBorrowed(), "Value isn't borrowed");
return support_;
}
const BorrowSupport& support() const {
return const_cast<RegState*>(this)->support();
}
private:
Register* model_;
std::vector<Register*> copies_;
RefKind kind_{RefKind::kUncounted};
BorrowSupport support_;
};
// A map from model values to their RegState, implemented as a thin wrapper
// around std:unordered_map<> that calls modelReg() on keys by default.
//
// All live values are tracked, even if they aren't a reference counted type,
// in order to correctly populate deopt info.
class StateMap {
using map_t = std::unordered_map<Register*, RegState>;
public:
auto size() const {
return map_.size();
}
auto empty() const {
return map_.empty();
}
auto countModel(Register* model) const {
JIT_DCHECK(model == modelReg(model), "countModel given non-model reg");
return map_.count(model);
}
RegState& getModel(Register* model) {
JIT_DCHECK(model == modelReg(model), "getModel given non-model reg");
return map_get(map_, model);
}
const RegState& getModel(Register* model) const {
JIT_DCHECK(model == modelReg(model), "getModel given non-model reg");
return map_get(map_, model);
}
auto find(Register* reg) {
return map_.find(modelReg(reg));
}
auto find(Register* reg) const {
return map_.find(modelReg(reg));
}
auto findModel(Register* model) {
JIT_DCHECK(model == modelReg(model), "findModel given non-model reg");
return map_.find(model);
}
auto findModel(Register* model) const {
JIT_DCHECK(model == modelReg(model), "findModel given non-model reg");
return map_.find(model);
}
template <typename... Args>
auto emplace(Args&&... args) {
return map_.emplace(std::forward<Args>(args)...);
}
auto begin() {
return map_.begin();
}
auto begin() const {
return map_.begin();
}
auto end() {
return map_.end();
}
auto end() const {
return map_.end();
}
auto eraseModel(Register* model) {
JIT_DCHECK(model == modelReg(model), "eraseModel given non-model reg");
return map_.erase(model);
}
auto erase(map_t::iterator it) {
return map_.erase(it);
}
bool operator==(const StateMap& other) const {
return map_ == other.map_;
}
bool operator!=(const StateMap& other) const {
return !operator==(other);
}
private:
map_t map_;
};
// In- and out-states for a BasicBlock, populated during the analysis phase.
struct BlockState {
// For blocks with <= 1 predecessor: an empty map.
//
// For blocks with >1 predecessor: values that are live after any Phis at
// block entry, including the Phi outputs.
StateMap in;
// Values that are live before the final control flow instruction
// (CondBranch, CondBranchCheckType, etc.) or after the terminator (Return,
// Deopt, etc.).
StateMap out;
};
// For every Register that is an input to one or more Phis, map from predecessor
// blocks to the Phi outputs that value contributes to.
using PhiUseMap = std::unordered_map<
Register*,
std::unordered_map<BasicBlock*, std::vector<Register*>>>;
struct RegisterLess {
bool operator()(const Register* a, const Register* b) const {
return a->id() < b->id();
}
};
struct RegStateLess {
bool operator()(const RegState* a, const RegState* b) const {
return RegisterLess{}(a->model(), b->model());
}
};
// Global state used by the analysis.
struct Env {
Env(Function& func) : func{func}, liveness{func} {
liveness.Run();
last_uses = liveness.GetLastUses();
// Visit each Phi to collect some metadata:
// - Assign a borrow support bit to any Register that is a Phi input or
// output.
// - Build up a map of values used by Phis, and the blocks they come from.
std::string bit_names;
auto add_support_bit = [&](Register* model) {
if (reg_to_bit.emplace(model, num_support_bits).second) {
if (g_debug_refcount) {
format_to(bit_names, " {} => {}\n", num_support_bits, *model);
}
num_support_bits++;
}
};
for (auto& block : func.cfg.blocks) {
block.forEachPhi([&](Phi& phi) {
auto output = phi.GetOutput();
add_support_bit(output);
for (int i = 0, n = phi.NumOperands(); i < n; ++i) {
auto model = modelReg(phi.GetOperand(i));
add_support_bit(model);
phi_uses[model][phi.basic_blocks()[i]].emplace_back(output);
}
});
}
TRACE("Support bits:\n%s", bit_names);
}
// State that is initialized during setup and is immutable during the pass
// itself:
Function& func;
// Liveness information, including which Registers die at each Instr.
LivenessAnalysis liveness;
LivenessAnalysis::LastUses last_uses;
// The number of bits in an initialized BorrowSupport, and the Register ->
// bit assignments.
size_t num_support_bits{AliasClass::kNumBits};
std::unordered_map<Register*, int> reg_to_bit;
// Information about Phi nodes, keyed by their input Registers.
PhiUseMap phi_uses;
// State that is initialized during the analysis phase and is unchanged
// during the mutation phase:
// In- and out-states for all blocks.
std::unordered_map<BasicBlock*, BlockState> blocks;
// Some functions are used by both the analysis and mutation phases and
// perform nearly identically between the two, so this is used as a flag to
// control the few behavioral differences that exist.
bool mutate{false};
// Transient state that is updated as instructions are processed:
// Unused Phi outputs are collected here, and dropped in bulk after the last
// Phi of the block.
std::vector<Register*> deferred_deaths;
// The state of all live Registers.
StateMap live_regs;
// All borrow support currently supporting live, borrowed Registers.
BorrowSupport borrow_support;
// All live registers that are currently supported by non-empty borrow
// support.
std::set<RegState*, RegStateLess> borrowed_regs;
};
struct PredState {
PredState(BasicBlock* block, const StateMap* state)
: block{block}, state{state} {}
BasicBlock* block;
const StateMap* state;
};
// Return a list of out-states for all visited predecessors of the given block,
// sorted by block id.
std::vector<PredState> collectPredStates(Env& env, BasicBlock* block) {
std::vector<PredState> preds;
for (auto edge : block->in_edges()) {
auto pred = edge->from();
auto it = env.blocks.find(pred);
if (it == env.blocks.end()) {
continue;
}
preds.emplace_back(pred, &it->second.out);
}
std::sort(preds.begin(), preds.end(), [](auto& p1, auto& p2) {
return p1.block->id < p2.block->id;
});
return preds;
}
// Return true iff the given Register is definitely not a reference-counted
// value.
bool isUncounted(const Register* reg) {
return !reg->type().couldBe(TMortalObject);
}
// Insert an Incref of `reg` before `cursor`.
void insertIncref(Env& env, Register* reg, Instr& cursor) {
JIT_DCHECK(env.mutate, "Attempt to insert incref with mutate == false");
JIT_DCHECK(!isUncounted(reg), "Attempt to incref an uncounted value");
Instr* incref;
if (reg->type() <= TObject) {
incref = Incref::create(reg);
} else {
incref = XIncref::create(reg);
}
incref->copyBytecodeOffset(cursor);
incref->InsertBefore(cursor);
TRACE(
"Inserted '%s' before '%s' in bb %d",
*incref,
cursor,
cursor.block()->id);
}
// Insert a Decref or XDecref of `reg`, depending on its type, before `cursor`.
void insertDecref(Env& env, Register* reg, Instr& cursor) {
JIT_DCHECK(env.mutate, "Attempt to insert decref with mutate == false");
JIT_DCHECK(!isUncounted(reg), "Attempt to decref an uncounted value");
Instr* decref;
if (reg->type() <= TObject) {
decref = Decref::create(reg);
} else {
decref = XDecref::create(reg);
}
decref->copyBytecodeOffset(cursor);
decref->InsertBefore(cursor);
TRACE(
"Inserted '%s' before '%s' in bb %d",
*decref,
cursor,
cursor.block()->id);
}
// If the given RegState is borrowed with non-empty support, track it in env.
void registerBorrowSupport(Env& env, RegState& rstate) {
if (!rstate.isBorrowed() || rstate.support().empty()) {
return;
}
env.borrow_support.add(rstate.support());
env.borrowed_regs.emplace(&rstate);
}
// Invalidate the borrow support represented by either a bit index or an
// AliasClass, updating live value state and inserting Increfs to promote
// values to owned as appropriate.
template <typename Support>
void invalidateBorrowSupport(Env& env, Instr& cursor, Support support) {
if (!env.borrow_support.intersects(support)) {
return;
}
for (auto it = env.borrowed_regs.begin(); it != env.borrowed_regs.end();) {
auto rstate = *it;
JIT_DCHECK(
rstate->isBorrowed(),
"Non-borrowed state in borrowed_regs: %s",
*rstate);
if (rstate->support().intersects(support)) {
rstate->setOwned();
if (env.mutate) {
insertIncref(env, rstate->current(), cursor);
}
it = env.borrowed_regs.erase(it);
} else {
++it;
}
}
env.borrow_support.remove(support);
}
// Kill a Register that has died after its last use. If that Register was the
// last live copy of its model, untrack it, and if we owned a reference to it,
// insert a Decref.
void killRegisterImpl(
Env& env,
RegState& rstate,
Register* copy,
Instr& cursor) {
TRACE("Killing %s from %s", *copy, rstate);
if (!rstate.killCopy(copy)) {
// There are copies of this value still live.
return;
}
Register* model = rstate.model();
if (rstate.isOwned()) {
// Before killing our owned reference, check for anyone borrowing from us.
auto bit_it = env.reg_to_bit.find(model);
if (bit_it != env.reg_to_bit.end()) {
invalidateBorrowSupport(env, cursor, bit_it->second);
}
// Invalidate all managed-memory-backed borrow support, for two reasons:
// 1. The Decref we're going to insert here can run arbitrary code in the
// destructor.
// 2. The value we're losing a reference to could be a container supporting
// a borrowed value.
// It's possible to do better in the future on both of these points, with
// more complexity.
invalidateBorrowSupport(env, cursor, AManagedHeapAny);
if (env.mutate) {
insertDecref(env, copy, cursor);
}
}
env.borrowed_regs.erase(&rstate);
env.live_regs.eraseModel(model);
}
// Kill a list of registers that have died, in an order that is predictable and
// avoids unecessary promotions from borrowed to owned.
void killRegisters(
Env& env,
const std::vector<Register*>& regs,
Instr& cursor) {
struct RegCopyState {
RegCopyState(Register* copy, RegState* rstate)
: copy(copy), rstate(rstate) {}
Register* copy;
RegState* rstate;
};
std::vector<RegCopyState> rstates;
rstates.reserve(regs.size());
for (Register* reg : regs) {
rstates.emplace_back(reg, &map_get(env.live_regs, reg));
}
auto rstate_lt = [](RegCopyState& a, RegCopyState& b) {
bool a_borrowed = a.rstate->isBorrowed();
bool b_borrowed = b.rstate->isBorrowed();
// Put borrowed registers before all others, and sort by register number
// within each group.
return (a_borrowed && !b_borrowed) ||
(a_borrowed == b_borrowed && RegStateLess{}(a.rstate, b.rstate));
};
std::sort(rstates.begin(), rstates.end(), rstate_lt);
for (RegCopyState& rcs : rstates) {
killRegisterImpl(env, *rcs.rstate, rcs.copy, cursor);
}
}
// Copy the given state into env, and re-initialize borrow support tracking
// from the new live values.
void useInState(Env& env, const StateMap& state) {
env.live_regs = state;
env.borrow_support.init(env.num_support_bits);
env.borrowed_regs.clear();
for (auto& pair : env.live_regs) {
registerBorrowSupport(env, pair.second);
}
}
// For a block with 0 or 1 predecessors, compute and activate its in-state. For
// the entry block, this is an empty map. For 1-predecessor blocks, it's a copy
// of the predecessor's out-state with adjustments for a CondBranch* in the
// predecessor and/or registers that died across the edge.
void useSimpleInState(Env& env, BasicBlock* block) {
if (block->in_edges().empty()) {
useInState(env, StateMap{});
return;
}
JIT_DCHECK(
block->in_edges().size() == 1,
"Only blocks with <= 1 predecessors are supported");
JIT_DCHECK(
!block->front().IsPhi(),
"Phis in a single-predecessor block are unsupported");
BasicBlock* pred = (*block->in_edges().begin())->from();
useInState(env, map_get(env.blocks, pred).out);
// First, adjust for a conditional branch, if any, in the predecessor.
Instr* term = pred->GetTerminator();
if (term->IsCondBranch() || term->IsCondBranchIterNotDone()) {
auto cond = static_cast<CondBranchBase*>(term);
// The operand of the CondBranch is uncounted coming out of the false edge:
// for CondBranch it's nullptr, and for CondBranchIterNotDone it's an
// immortal sentinel.
if (block == cond->false_bb()) {
Register* reg = cond->GetOperand(0);
map_get(env.live_regs, reg).setUncounted();
}
} else if (term->IsCondBranchCheckType()) {
// PyWaitHandleObject is an uncounted singleton, so we adjust its reference
// state here to avoid refcounting it.
auto cond = static_cast<CondBranchCheckType*>(term);
if (cond->type() == TWaitHandle) {
if (block == cond->true_bb()) {
Register* reg = cond->GetOperand(0);
map_get(env.live_regs, reg).setUncounted();
}
}
}
// Second, kill any registers that die across the edge.
RegisterSet live_in = env.liveness.GetIn(block);
std::vector<Register*> dying_values;
for (auto& pair : env.live_regs) {
RegState& rstate = pair.second;
for (int i = rstate.numCopies() - 1; i >= 0; --i) {
Register* reg = rstate.copy(i);
if (!live_in.count(reg)) {
dying_values.emplace_back(reg);
}
}
}
killRegisters(env, dying_values, block->front());
}
// The first time we see a block with multiple predecessors, populate its
// in-state with all live-in registers and Phi outputs, with their copy lists
// appropriately initialized.
void initializeInState(
BasicBlock* block,
StateMap& in_state,
const RegisterSet& live_in,
const StateMap& pred_state) {
for (auto current : live_in) {
auto model = modelReg(current);
auto in_pair = in_state.emplace(model, model);
if (!in_pair.second) {
// We already processed this value with a copy we saw earlier.
continue;
}
auto& rstate = in_pair.first->second;
// Clear the list of copies since we're initializing it manually.
rstate.killCopy(model);
// Using an arbitrary predecessor to get definition order, insert any
// copies that are still live into this block.
auto& pred_rstate = pred_state.getModel(model);
for (int i = 0, n = pred_rstate.numCopies(); i < n; ++i) {
auto copy = pred_rstate.copy(i);
if (live_in.count(copy)) {
rstate.addCopy(copy);
}
}
}
block->forEachPhi([&](Phi& phi) {
auto inserted = in_state.emplace(phi.GetOutput(), phi.GetOutput()).second;
JIT_DCHECK(inserted, "Register shouldn't exist in map yet");
});
}
// Return true iff the given register is live into the given block, in the
// given in-state. Phi outputs are not live into the block they're defined in,
// even though they appear in the in-state.
bool isLiveIn(BasicBlock* block, Register* reg, const StateMap& in_state) {
if (reg->instr()->IsPhi() && reg->instr()->block() == block) {
return false;
}
return in_state.countModel(reg);
}
struct PhiInput {
PhiInput(BasicBlock* block, const RegState* rstate)
: block{block}, rstate{rstate} {}
BasicBlock* block;
const RegState* rstate;
};
// Return a list of predecessor blocks paired with the RegState for the value
// they provide to the given Phi. This relies on the output of
// collectPredStates() being sorted in the same order as Phi::basic_blocks, by
// block id.
std::vector<PhiInput> collectPhiInputs(
const std::vector<PredState>& preds,
const Phi& phi) {
std::vector<PhiInput> inputs;
auto preds_it = preds.begin();
for (std::size_t phi_idx = 0; preds_it != preds.end(); ++phi_idx) {
auto& pred = *preds_it;
if (phi.basic_blocks().at(phi_idx) != pred.block) {
// This predecessor hasn't been processed yet.
continue;
}
auto input = phi.GetOperand(phi_idx);
inputs.emplace_back(pred.block, &map_get(*pred.state, input));
++preds_it;
}
JIT_DCHECK(!inputs.empty(), "Processing block with no visited predecessors");
return inputs;
}
// Information about Phi instructions: a set of owned Phi inputs that aren't
// separately live into the block, and a map of which Phi outputs those dead
// inputs could forward their owned reference to.
//
// Used to modify the support of values borrowed from the dead inputs, so we
// only borrow references from live values.
struct PhiSupport {
PhiSupport(size_t support_bits) {
dead.init(support_bits);
}
BorrowSupport dead;
std::unordered_map<size_t, BorrowSupport> forwards;
};
// For each Phi in the given block, inspect the state of all incoming values
// and decide on a merged state for the Phi's output.
PhiSupport processPhis(
Env& env,
BasicBlock* block,
const std::vector<PredState>& preds,
StateMap& in_state) {
PhiSupport support_info(env.num_support_bits);
for (auto& instr : *block) {
if (!instr.IsPhi()) {
break;
}
auto& phi = static_cast<Phi&>(instr);
auto output = phi.GetOutput();
auto& rstate = in_state.getModel(output);
// No more analysis is needed if the value isn't refcounted, or if it's
// already owned.
if (isUncounted(output) || rstate.isOwned()) {
continue;
}
auto inputs = collectPhiInputs(preds, phi);
// Dead phi inputs with an owned reference force the phi output to be
// owned. We also keep track of which Phi outputs these owned references
// are forwarded into, so borrow support that depends on the now-dead
// registers can be updated.
bool promote_output = false;
for (PhiInput& input : inputs) {
Register* model = input.rstate->model();
if (!isLiveIn(block, model, in_state) && input.rstate->isOwned()) {
promote_output = true;
size_t model_bit = map_get(env.reg_to_bit, model);
support_info.dead.add(model_bit);
auto forward_pair =
support_info.forwards.emplace(model_bit, BorrowSupport{});
if (forward_pair.second) {
forward_pair.first->second.init(env.num_support_bits);
}
forward_pair.first->second.add(map_get(env.reg_to_bit, output));
TRACE("Forwarding support from dead %s to %s", *model, *output);
}
}
if (promote_output) {
rstate.setOwned();
continue;
}
// Otherwise, the phi's output is borrowed from its owned inputs and the
// borrow support of its borrowed inputs.
rstate.setBorrowed(env.num_support_bits);
for (auto& input : inputs) {
if (input.rstate->isOwned()) {
rstate.support().add(map_get(env.reg_to_bit, input.rstate->model()));
} else if (input.rstate->isBorrowed()) {
// TODO(bsimmers): If this input gets promoted to owned because of a
// loop, the borrow support we add here will be redundant and could
// result in worse results. We should revisit this at some point, but
// it's never incorrect to add more borrow support and fixing this gets
// messy.
rstate.support().add(input.rstate->support());
}
}
}
return support_info;
}
// Update the in-state for the given block, leaving the result in both
// env.live_regs and env.blocks[block].in.
void updateInState(Env& env, BasicBlock* block) {
if (block->in_edges().size() <= 1) {
useSimpleInState(env, block);
return;
}
auto preds = collectPredStates(env, block);
auto live_in = env.liveness.GetIn(block);
auto block_pair = env.blocks.emplace(
std::piecewise_construct,
std::forward_as_tuple(block),
std::forward_as_tuple());
auto& in_state = block_pair.first->second.in;
if (block_pair.second) {
initializeInState(block, in_state, live_in, *preds[0].state);
}
PhiSupport phi_support = processPhis(env, block, preds, in_state);
for (auto& pair : in_state) {
auto& rstate = pair.second;
auto model = rstate.model();
if (isUncounted(rstate.current()) || rstate.isOwned()) {
continue;
}
if (!(model->instr()->IsPhi() && model->instr()->block() == block)) {
for (auto& pred : preds) {
rstate.merge(pred.state->getModel(model));
if (rstate.isOwned()) {
break;
}
}
}
// If the value is borrowed from one or more now-dead Phi inputs, change it
// to borrow from the corresponding Phi output(s) instead.
if (rstate.isBorrowed() && rstate.support().intersects(phi_support.dead)) {
for (auto pair : phi_support.forwards) {
if (rstate.support().intersects(pair.first)) {
rstate.support().remove(pair.first);
rstate.support().add(pair.second);
}
}
}
}
useInState(env, in_state);
}
// If the given instruction can deopt, fill in its live registers.
void fillDeoptLiveRegs(const StateMap& live_regs, Instr& instr) {
auto deopt = dynamic_cast<DeoptBase*>(&instr);
if (deopt == nullptr) {
return;
}
for (auto& pair : live_regs) {
auto& rstate = pair.second;
auto ref_kind = rstate.kind();
for (int i = 0, n = rstate.numCopies(); i < n; ++i) {
Register* reg = rstate.copy(i);
deopt->emplaceLiveReg(reg, ref_kind, deoptValueKind(reg->type()));
if (ref_kind == RefKind::kOwned) {
// Treat anything other than the first copy as borrowed, to avoid
// over-decrefing it. We can probably do better in the future by
// ensuring that we only ever have one copy of each value in the
// FrameState/live regs, but that's a more disruptive change.
ref_kind = RefKind::kBorrowed;
}
}
}
}
// If the given instruction is a yield, record registers which are currently
// live and owned. These may be used in GC operations like deallocate and
// traverse while generator execution is suspended.
static void fillYieldLiveRegs(const StateMap& live_regs, Instr& instr) {
auto yield = dynamic_cast<YieldBase*>(&instr);
if (yield == nullptr) {
return;
}
for (auto& pair : live_regs) {
auto& rstate = pair.second;
auto kind = rstate.kind();
if (kind == RefKind::kOwned) {
yield->emplaceLiveOwnedReg(rstate.copy(0));
} else {
yield->emplaceLiveUnownedReg(rstate.copy(0));
}
}
}
// Process any operands stolen by the given instruction.
void stealInputs(
Env& env,
Instr& instr,
const util::BitVector& stolen_inputs,
const RegisterSet& dying_regs) {
if (stolen_inputs.GetPopCount() == 0) {
return;
}
for (int i = 0, n = instr.NumOperands(); i < n; ++i) {
if (!stolen_inputs.GetBit(i)) {
continue;
}
auto reg = instr.GetOperand(i);
auto& rstate = map_get(env.live_regs, reg);
if (rstate.isOwned() && dying_regs.count(reg)) {
// This instruction is the last use of reg and we own a reference to it,
// so forward the reference to the instruction. Mark the value as
// borrowed to avoid forwarding this reference more than once in this
// loop, and it will be killed later in processInstr().
rstate.setBorrowed(env.num_support_bits);
continue;
}
if (env.mutate && !rstate.isUncounted()) {
insertIncref(env, reg, instr);
}
}
}
// Track the output of the given instruction.
void processOutput(Env& env, const Instr& instr, const MemoryEffects& effects) {
auto output = instr.GetOutput();
if (output == nullptr) {
return;
}
if (isPassthrough(instr)) {
auto& rstate = map_get(env.live_regs, output);
rstate.addCopy(output);
if (isUncounted(output)) {
rstate.setUncounted();
}
return;
}
auto pair = env.live_regs.emplace(output, output);
JIT_DCHECK(pair.second, "Register %s already defined", output->name());
auto& rstate = pair.first->second;
if (isUncounted(output)) {
// Do nothing. rstate is already Uncounted by default.
} else if (effects.borrows_output) {
rstate.setBorrowed(env.num_support_bits);
rstate.support().add(effects.borrow_support);
registerBorrowSupport(env, rstate);
} else {
rstate.setOwned();
}
}
// Process the given instruction: handle its memory effects, stolen inputs,
// output, and any registers that die after it.
void processInstr(Env& env, Instr& instr) {
JIT_DCHECK(
!instr.IsIncref() && !instr.IsDecref() && !instr.IsXDecref() &&
!instr.IsSnapshot(),
"Unsupported instruction %s",
instr.opname());
if (instr.numEdges() > 0) {
// Branches are handled outside the main loop.
return;
}
auto last_uses_it = env.last_uses.find(&instr);
auto& dying_regs =
last_uses_it == env.last_uses.end() ? kEmptyRegSet : last_uses_it->second;
TRACE("Processing '%s' with state:\n%s", instr, env.live_regs);
if (!dying_regs.empty()) {
TRACE("dying_regs: %s", dying_regs);
}
if (instr.IsPhi()) {
// If a Phi output is unused, it will die immediately after the Phi that
// defines it. It's illegal to insert a Decref between Phis, so we collect
// any such Registers to Decref together after the last Phi in the block.
if (!dying_regs.empty()) {
JIT_DCHECK(dying_regs.size() == 1, "Multiple regs dying after Phi");
auto output = instr.GetOutput();
JIT_DCHECK(
*dying_regs.begin() == output, "Unexpected value dying after Phi");
env.deferred_deaths.emplace_back(output);
}
auto& next = *std::next(instr.block()->iterator_to(instr));
if (!next.IsPhi() && !env.deferred_deaths.empty()) {
killRegisters(env, env.deferred_deaths, next);
env.deferred_deaths.clear();
}
return;
}
auto effects = memoryEffects(instr);
invalidateBorrowSupport(env, instr, effects.may_store);
stealInputs(env, instr, effects.stolen_inputs, dying_regs);
if (instr.IsReturn()) {
JIT_DCHECK(env.live_regs.size() == 1, "Unexpected live value(s) at Return");
JIT_DCHECK(
!map_get(env.live_regs, instr.GetOperand(0)).isOwned(),
"Return operand should not be owned at exit");
return;
}
if (env.mutate) {
fillDeoptLiveRegs(env.live_regs, instr);
fillYieldLiveRegs(env.live_regs, instr);
}
if (instr.IsTerminator()) {
return;
}
processOutput(env, instr, effects);
auto& next_instr = *std::next(instr.block()->iterator_to(instr));
killRegisters(env, {dying_regs.begin(), dying_regs.end()}, next_instr);
}
// When leaving a block with one successor, insert any Increfs necessary to
// transition to the target state.
void exitBlock(Env& env, const Edge* out_edge) {
auto block = out_edge->from();
auto succ = out_edge->to();
if (succ->in_edges().size() == 1) {
// No reconciliation is needed on 1:1 edges.
return;
}
auto const& from_regs = env.live_regs;
auto const& to_regs = map_get(env.blocks, succ).in;
TRACE("Reconciling to in-state for bb %d:\n%s", succ->id, to_regs);
// Count the number of increfs we need for each value.
std::vector<std::pair<Register*, int>> reg_increfs;
for (auto& pair : from_regs) {
auto model = pair.first;
auto& from_rstate = pair.second;
if (from_rstate.isUncounted()) {
// Like in enterBlock(), sending an uncounted value to any other state
// never needs an adjustment.
continue;
}
bool to_owned = [&] {
if (!isLiveIn(succ, model, to_regs)) {
return false;
}
return to_regs.getModel(model).isOwned();
}();
// Start by calculating the number of increfs needed to reconcile the out
// state to the in state. This may begin as -1 if the out state is Owned
// and the in state isn't, in which case the outgoing owned reference will
// be transferred to a Phi dest.
int increfs = to_owned - from_rstate.isOwned();
// Add an incref for every time this value is passed to an owned Phi output.
auto phi_it = env.phi_uses.find(model);
if (phi_it != env.phi_uses.end()) {
auto block_it = phi_it->second.find(block);
if (block_it != phi_it->second.end()) {
for (auto phi_output : block_it->second) {
increfs += to_regs.getModel(phi_output).isOwned();
}
}
}
if (increfs > 0) {
reg_increfs.emplace_back(from_rstate.current(), increfs);
} else {
JIT_DCHECK(increfs == 0, "Invalid state transition");
}
}
std::sort(reg_increfs.begin(), reg_increfs.end(), [](auto& a, auto& b) {
return RegisterLess{}(a.first, b.first);
});
auto& cursor = block->back();
for (auto& pair : reg_increfs) {
for (int i = 0; i < pair.second; ++i) {
// If we see long strings of Increfs being inserted in real code by this,
// we should either figure out if we can optimize code like that better,
// or create a variant of Incref that adds more than 1 to the refcount.
insertIncref(env, pair.first, cursor);
}
}
}
// Bind guards to their dominating FrameState, and remove all Snapshot
// instructions since they're no longer needed. We run this before refcount
// insertion to ensure that Snapshots don't keep values alive longer than
// necessary.
void bindGuards(Function& irfunc) {
std::vector<Instr*> snapshots;
for (auto& block : irfunc.cfg.blocks) {
FrameState* fs = nullptr;
for (auto& instr : block) {
if (instr.IsSnapshot()) {
auto& snapshot = static_cast<const Snapshot&>(instr);
fs = snapshot.frameState();
snapshots.emplace_back(&instr);
} else if (
instr.IsGuard() || instr.IsGuardIs() || instr.IsGuardType() ||
instr.IsDeopt() || instr.IsDeoptPatchpoint()) {
JIT_DCHECK(
fs != nullptr,
"No dominating snapshot for '%s' in function:\n%s",
instr,
irfunc);
auto& guard = static_cast<DeoptBase&>(instr);
guard.setFrameState(*fs);
} else if (!instr.isReplayable()) {
fs = nullptr;
}
}
}
for (auto& snapshot : snapshots) {
snapshot->unlink();
delete snapshot;
}
DeadCodeElimination{}.Run(irfunc);
}
std::ostream& operator<<(std::ostream& os, const RegState& rstate) {
os << "RegState{[";
auto sep = "";
for (int i = 0, n = rstate.numCopies(); i < n; ++i) {
fmt::print(os, "{}{}", sep, rstate.copy(i)->name());
sep = ", ";
}
fmt::print(os, "], {}", rstate.kind());
if (rstate.isBorrowed() && !rstate.support().empty()) {
fmt::print(os, " {}", rstate.support().bits());
}
return os << "}";
}
std::ostream& operator<<(std::ostream& os, const StateMap& regs) {
if (regs.empty()) {
return os << "StateMap[0] = {}";
}
std::vector<const RegState*> states;
for (auto& pair : regs) {
states.emplace_back(&pair.second);
}
std::sort(states.begin(), states.end(), RegStateLess{});
fmt::print(os, "StateMap[{}] = {{\n", states.size());
for (auto state : states) {
fmt::print(os, " {} -> {}\n", state->model()->name(), *state);
}
return os << "}";
}
void optimizeLongDecrefRuns(Function& irfunc) {
constexpr int kMinimumNumberOfDecrefsToOptimize = 4;
auto get_number_of_decrefs = [](auto block, auto cur_iter) {
int result = 0;
while (cur_iter != block->end()) {
if (!cur_iter->IsDecref()) {
break;
}
result++;
++cur_iter;
}
return result;
};
for (auto& block : irfunc.cfg.GetRPOTraversal()) {
auto cur_iter = block->begin();
while (cur_iter != block->end()) {
if (!cur_iter->IsDecref()) {
++cur_iter;
continue;
}
int num = get_number_of_decrefs(block, cur_iter);
if (num < kMinimumNumberOfDecrefsToOptimize) {
std::advance(cur_iter, num);
continue;
}
auto batch_decref = BatchDecref::create(num);
batch_decref->InsertBefore(*cur_iter);
constexpr size_t kDecrefOperandIndex = 0;
for (int i = 0; i < num; i++) {
JIT_CHECK(
cur_iter->IsDecref(),
"An unexpected non-decref instruction in a decref run.");
batch_decref->SetOperand(i, cur_iter->GetOperand(kDecrefOperandIndex));
auto old_instr = cur_iter++;
old_instr->unlink();
delete &(*old_instr);
}
}
}
}
} // namespace
void RefcountInsertion::Run(Function& func) {
PhiElimination{}.Run(func);
bindGuards(func);
func.cfg.splitCriticalEdges();
TRACE(
"Starting refcount insertion for '%s':\n%s",
func.fullname,
HIRPrinter(true).ToString(func));
Env env{func};
auto rpo_blocks = func.cfg.GetRPOTraversal();
Worklist<BasicBlock*> worklist;
for (auto block : rpo_blocks) {
worklist.push(block);
}
while (!worklist.empty()) {
auto block = worklist.front();
worklist.pop();
updateInState(env, block);
TRACE("\nAnalyzing bb %d with in-state:\n%s", block->id, env.live_regs);
for (auto& instr : *block) {
processInstr(env, instr);
}
TRACE("Finished bb %d with out-state:\n%s", block->id, env.live_regs);
auto& block_state = env.blocks[block];
if (env.live_regs != block_state.out) {
block_state.out = std::move(env.live_regs);
for (auto edge : block->out_edges()) {
worklist.push(edge->to());
}
}
}
TRACE("\nStarting mutation phase");
env.mutate = true;
for (auto block : rpo_blocks) {
// Remember first_instr here to skip any (Inc|Dec)Refs inserted by
// useSimpleInState().
auto& first_instr = block->front();
if (block->in_edges().size() <= 1) {
useSimpleInState(env, block);
} else {
useInState(env, map_get(env.blocks, block).in);
}
TRACE("\nEntering bb %d with state:\n%s", block->id, env.live_regs);
for (auto it = block->iterator_to(first_instr); it != block->end();) {
auto& instr = *it;
// Increment it before calling processInstr() to skip any Decrefs
// inserted after instr.
++it;
processInstr(env, instr);
}
TRACE("Leaving bb %d with state:\n%s", block->id, env.live_regs);
if (block->out_edges().size() == 1) {
exitBlock(env, *block->out_edges().begin());
}
}
// Clean up any trampoline blocks that weren't necessary.
// TODO(emacs): Investigate running the whole CleanCFG pass here or between
// every pass.
CleanCFG::RemoveTrampolineBlocks(&func.cfg);
// Optimize long decref runs
optimizeLongDecrefRuns(func);
}
} // namespace hir
} // namespace jit