hphp/runtime/vm/jit/vasm-simplify.cpp (1,389 lines of code) (raw):

/* +----------------------------------------------------------------------+ | HipHop for PHP | +----------------------------------------------------------------------+ | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | | available through the world-wide-web at the following url: | | http://www.php.net/license/3_01.txt | | If you did not receive a copy of the PHP license and are unable to | | obtain it through the world-wide-web, please send a note to | | license@php.net so we can mail you a copy immediately. | +----------------------------------------------------------------------+ */ #include "hphp/runtime/vm/jit/vasm.h" #include "hphp/runtime/vm/jit/vasm-simplify-internal.h" #include "hphp/runtime/vm/jit/abi.h" #include "hphp/runtime/vm/jit/containers.h" #include "hphp/runtime/vm/jit/vasm-gen.h" #include "hphp/runtime/vm/jit/vasm-info.h" #include "hphp/runtime/vm/jit/vasm-instr.h" #include "hphp/runtime/vm/jit/vasm-print.h" #include "hphp/runtime/vm/jit/vasm-unit.h" #include "hphp/runtime/vm/jit/vasm-util.h" #include "hphp/runtime/vm/jit/vasm-visit.h" #include "hphp/util/arch.h" #include "hphp/util/asm-x64.h" #include <utility> TRACE_SET_MOD(vasm); namespace HPHP::jit { namespace { /////////////////////////////////////////////////////////////////////////////// enum class ResValid : uint8_t { Empty, Valid, Invalid }; struct GetMemOp { template<class T> void imm (T) {} template<class T> void def (T) {} template<class T> void use (T) {} void use (Vptr mem) { if (rv != ResValid::Empty) { rv = ResValid::Invalid; } else { mr = mem; rv = ResValid::Valid; } } template<class T> void across (T) {} template<class T, class H> void useHint(T,H) {} template<class T, class H> void defHint(T,H) {} template<Width w> void use(Vp<w> m) { use(static_cast<Vptr>(m)); } bool isValid() { return rv == ResValid::Valid;} Vptr mr; ResValid rv{ResValid::Empty}; }; /* * Return the Vptr and size for simple insts that read/write memory. * * Return a size zero to indicate a non-simple inst, e.g. a call. */ std::pair<Vptr, uint8_t> getMemOpAndSize(const Vinstr& inst) { GetMemOp g; visitOperands(inst, g); auto const vop = g.mr; if (!g.isValid()) return {vop, 0}; auto sz = width(inst.op); // workaround: load and store opcodes report a width of Octa, // while in reality they are Quad. if (inst.op == Vinstr::load || inst.op == Vinstr::store) { sz = Width::Quad; } sz &= Width::AnyNF; auto const size = [&] { switch (sz) { case Width::Byte: return sz::byte; case Width::Word: case Width::WordN: return sz::word; case Width::Long: case Width::LongN: return sz::dword; case Width::Quad: case Width::QuadN: return sz::qword; case Width::Octa: case Width::AnyNF: return 16; default: return 0; } }(); return {vop, size}; } /* * Return true if we are sure that `inst1' does not write to the same memory * location that `inst2' reads or writes (and vice versa). * * (Note that a false return does /not/ make any positive indication about * aliasing.) */ bool cannot_alias_write(const Vinstr& inst1, const Vinstr& inst2) { if (!writesMemory(inst1.op) && !writesMemory(inst2.op)) return true; auto const p1 = getMemOpAndSize(inst1); auto const p2 = getMemOpAndSize(inst2); if (p1.second == 0 || p2.second == 0) return false; auto const v1 = p1.first; auto const v2 = p2.first; auto const size1 = p1.second; auto const size2 = p2.second; if (v1.base != v2.base || v1.seg != v2.seg) return false; if ((v1.index != v2.index) || (v1.index.isValid() && (v1.scale != v2.scale))) { return false; } if (v1.disp == v2.disp) return false; if (v1.disp < v2.disp) { return v2.disp >= v1.disp + size1; } else { return v1.disp >= v2.disp + size2; } } /* * Metadata about a potentially-foldable load Vinstr. */ struct FoldableLoadInfo { /* * Source memory location of the load. */ const Vptr* operand; /* * Index into the instruction stream. * * Note that this makes FoldableLoadInfo block-context-sensitive. */ size_t idx; /* * Whether we need to check for interfering writes to the same location. */ bool check_writes; }; /* * If reg is defined by a load in b with index j < i, and there are no * interfering stores between j and i, return a pointer to its Vptr argument. * * Note that during simplification, we can end up with mismatched register * sizes, so that a loadzbq feeds a byte sized instruction. In that case, its * ok to treat the loadzbq as if it were a loadb, provided we're expecting a * byte sized operand. Since Vreg doesn't carry a register-width around, we * pass it in via the `size' param, so that we can verify the zero-extend * variants. */ FoldableLoadInfo foldable_load_helper(Env& env, Vreg reg, int size, Vlabel b, size_t i, bool mw) { if (!i || env.use_counts[reg] != 1) return { nullptr, 0, false }; auto const def_inst = env.def_insts[reg]; switch (def_inst) { case Vinstr::loadb: case Vinstr::loadw: case Vinstr::loadl: case Vinstr::load: break; case Vinstr::loadzbq: case Vinstr::loadzbl: if (size > sz::byte) return { nullptr, 0, false }; break; case Vinstr::loadzwq: if (size > sz::word) return { nullptr, 0, false }; break; case Vinstr::loadzlq: if (size > sz::dword) return { nullptr, 0, false }; break; case Vinstr::copy: break; default: return { nullptr, 0, false }; } #define CHECK_LOAD(load) \ case Vinstr::load: \ if (inst.load##_.d != reg) break; \ return { &inst.load##_.s, i, mw }; while (i--) { auto const& inst = env.unit.blocks[b].code[i]; if (inst.op == def_inst) { switch (def_inst) { CHECK_LOAD(loadb); CHECK_LOAD(loadw); CHECK_LOAD(loadl); CHECK_LOAD(load); CHECK_LOAD(loadzbq); CHECK_LOAD(loadzbl); CHECK_LOAD(loadzwq); CHECK_LOAD(loadzlq); case Vinstr::copy: if (inst.copy_.d != reg) break; return foldable_load_helper(env, inst.copy_.s, size, b, i, mw); default: not_reached(); } } auto const op = env.unit.blocks[b].code[i].op; mw |= writesMemory(op); } return { nullptr, 0, false }; } const Vptr* foldable_load(Env& env, Vreg reg, int size, Vlabel b, size_t i) { auto const info = foldable_load_helper(env, reg, size, b, i, false); if (!info.operand || info.idx + 1 == i) return info.operand; std::vector<Vreg> nonSSARegs; visit(env.unit, *info.operand, [&] (Vreg r, Width) { if (r.isPhys()) nonSSARegs.push_back(r); }); if (nonSSARegs.empty() && !info.check_writes) return info.operand; // The Vptr contains non-ssa regs, so we need to check that they're // not modified between the load and the use. auto const loadInst = env.unit.blocks[b].code[info.idx]; for (auto ix = info.idx; ++ix < i; ) { auto const& inst = env.unit.blocks[b].code[ix]; if (isCall(inst)) return nullptr; bool ok = true; if (!nonSSARegs.empty()) { visitDefs(env.unit, inst, [&] (Vreg r, Width) { for (auto const t : nonSSARegs) { if (t == r) ok = false; } }); if (!ok) return nullptr; } if (info.check_writes && writesMemory(inst.op) && !cannot_alias_write(loadInst, inst)) { return nullptr; } } return info.operand; } const Vptr* foldable_load(Env& env, Vreg8 reg, Vlabel b, size_t i) { return foldable_load(env, reg, sz::byte, b, i); } const Vptr* foldable_load(Env& env, Vreg16 reg, Vlabel b, size_t i) { return foldable_load(env, reg, sz::word, b, i); } const Vptr* foldable_load(Env& env, Vreg32 reg, Vlabel b, size_t i) { return foldable_load(env, reg, sz::dword, b, i); } const Vptr* foldable_load(Env& env, Vreg64 reg, Vlabel b, size_t i) { return foldable_load(env, reg, sz::qword, b, i); } /////////////////////////////////////////////////////////////////////////////// /* * Simplify an `inst' at block `b', instruction `i', returning whether or not * any changes were made. * * Specializations are below. */ template <typename Inst> bool simplify(Env&, const Inst& /*inst*/, Vlabel /*b*/, size_t /*i*/) { return false; } template <typename Inst> bool psimplify(Env&, const Inst& /*inst*/, Vlabel /*b*/, size_t /*i*/) { return false; } //////////////////////////////////////////////////////////////////////////////// /* * Narrow compares */ /* * Return a bound on the size of a Vreg. * If its known to fit in: * - 8 unsigned bits, return sz::byte * - 16 unsigned bits, return sz::word * - 32 unsigned bits, return sz::dword * - otherwise return sz::qword. */ int value_width(Env& env, Vreg reg) { if (auto const c = env.consts[reg]) { if (!c->isUndef && c->kind != Vconst::Double) { if (c->val <= 0xff) return sz::byte; if (c->val <= 0xffff) return sz::word; if (c->val <= 0xffffffff) return sz::dword; } return sz::qword; } switch (env.def_insts[reg]) { case Vinstr::loadzbq: case Vinstr::loadzbl: case Vinstr::movzbw: case Vinstr::movzbl: case Vinstr::movzbq: return sz::byte; case Vinstr::movzwl: case Vinstr::movzwq: case Vinstr::loadzwq: return sz::word; case Vinstr::movzlq: case Vinstr::loadzlq: return sz::dword; default: break; } return sz::qword; } /* * Return a suitable register for r, with width size. * * - if r is constant, just return a constant. * - if r is the result of a zero-extending move from the * correct size, return the source of the move * - if r is the result of a zero-extending load with one use, * convert the load to a non-zero extending form * - otherwise we should logically apply a movtq<sz> to the register, * and return the dst. we don't check widths after simplify, however, * and inserting them can prevent other patterns from being recognized, * so just leave them out. */ Vreg narrow_reg(Env& env, int size, Vreg r, Vlabel b, size_t i) { if (auto const c = env.consts[r]) { assertx(!c->isUndef && c->kind != Vconst::Double); return r; } auto reg = [&] () -> Vreg { while (i--) { auto replace = [&] (const Vinstr& rep) { return simplify_impl(env, b, i, [&] (Vout& v) { v << rep; return 1; }); }; auto match = [&] (int rsz, size_t rn) { if (rsz != size) return Vreg{}; if (env.use_counts[r] == 1) { replace(nop{}); } return Vreg{ rn }; }; auto const& inst = env.unit.blocks[b].code[i]; switch (inst.op) { case Vinstr::loadzbq: { auto const& load = inst.get<Vinstr::loadzbq>(); if (load.d == r) { if (size == sz::byte && env.use_counts[r] == 1) { replace(loadb{load.s, r}); return r; } return {}; } break; } case Vinstr::loadzbl: { auto const& load = inst.get<Vinstr::loadzbl>(); if (load.d == r) { if (size == sz::byte && env.use_counts[r] == 1) { replace(loadb{load.s, r}); return r; } return {}; } break; } case Vinstr::movzbw: if (inst.movzbw_.d == r) return match(sz::byte, inst.movzbw_.s); break; case Vinstr::movzbl: if (inst.movzbl_.d == r) return match(sz::byte, inst.movzbl_.s); break; case Vinstr::movzbq: if (inst.movzbq_.d == r) return match(sz::byte, inst.movzbq_.s); break; case Vinstr::movzwl: if (inst.movzwl_.d == r) return match(sz::word, inst.movzwl_.s); break; case Vinstr::movzwq: if (inst.movzwq_.d == r) return match(sz::word, inst.movzwq_.s); break; case Vinstr::loadzwq: { auto const& load = inst.get<Vinstr::loadzwq>(); if (load.d == r) { if (size == sz::word && env.use_counts[r] == 1) { replace(loadw{load.s, r}); return r; } return {}; } break; } case Vinstr::movzlq: if (inst.movzlq_.d == r) return match(sz::dword, inst.movzlq_.s); break; case Vinstr::loadzlq: { auto const& load = inst.get<Vinstr::loadzlq>(); if (load.d == r) { if (size == sz::dword && env.use_counts[r] == 1) { replace(loadl{load.s, r}); return r; } return {}; } break; } default: break; } } return {}; }(); return reg.isValid() ? reg : r; } template<typename instb, typename instw, typename instl, typename instq> int narrow_inst(Env& env, int size, const instq& vinst, Vlabel b, size_t i, Vout& v) { auto const s0 = narrow_reg(env, size, vinst.s0, b, i); auto const s1 = narrow_reg(env, size, vinst.s1, b, i); switch (size) { case sz::byte: v << instb{s0, s1, vinst.sf}; break; case sz::word: v << instw{s0, s1, vinst.sf}; break; case sz::dword: v << instl{s0, s1, vinst.sf}; break; default: always_assert(false); } return 1; } enum class SFUsage { Ok, Fixable, Unfixable }; /* * Check whether all uses of sf are suitable, or can be converted to * suitable uses, as determined by fun. * * fun takes a CC as its argument, and returns * - CC_None to indicate the usage is unsuitable * - The input cc to indicate that its already suitable * - Some other cc to indicate that we could convert to that. * * If fix is true, this will convert any uses as specified. */ template<class F> SFUsage check_sf_usage_helper(Env& env, Vreg sf, Vlabel b, size_t i, bool fix, F fun) { auto ret = SFUsage::Ok; auto uses = env.use_counts[sf]; while (uses) { auto const& code = env.unit.blocks[b].code; if (++i == code.size()) return SFUsage::Unfixable; if (getSFUseReg(code[i]) == sf) { uses--; auto tmp = code[i]; auto &cc = getConditionCode(tmp); auto const newCC = fun(cc); if (newCC == CC_None) { return SFUsage::Unfixable; } if (newCC != cc) { if (fix) { cc = newCC; simplify_impl(env, b, i, [&] (Vout& v) { v << tmp; return 1; }); } ret = SFUsage::Fixable; } } } return ret; } template<class F> bool check_sf_usage(Env& env, Vreg sf, Vlabel b, size_t i, F fun) { switch (check_sf_usage_helper(env, sf, b, i, false, fun)) { case SFUsage::Ok: return true; case SFUsage::Fixable: check_sf_usage_helper(env, sf, b, i, true, fun); return true; case SFUsage::Unfixable: return false; } not_reached(); } // Try to change the condition codes so they work when the inputs to // the compare are swapped. Return true if successful (or there was // nothing to do). bool flip_ordered_uses(Env& env, Vreg sf, Vlabel b, size_t i) { return check_sf_usage( env, sf, b, i, [] (ConditionCode cc) { switch (cc) { case CC_None: always_assert(false); case CC_O: case CC_NO: // can't be fixed return CC_None; case CC_B: return CC_A; case CC_AE: return CC_BE; case CC_BE: return CC_AE; case CC_A: return CC_B; case CC_E: case CC_NE: // already unordered return cc; case CC_S: case CC_NS: case CC_P: case CC_NP: // can't be fixed return CC_None; case CC_L: return CC_G; case CC_GE: return CC_LE; case CC_LE: return CC_GE; case CC_G: return CC_L; } not_reached(); } ); } // Change any signed conditions to the corresponding unsigned // condition. Return true if successful (or there was nothing to do). bool fix_signed_uses(Env& env, Vreg sf, Vlabel b, size_t i) { return check_sf_usage( env, sf, b, i, [] (ConditionCode cc) { switch (cc) { case CC_None: always_assert(false); case CC_O: case CC_NO: // can't be fixed return CC_None; case CC_B: case CC_AE: case CC_E: case CC_NE: case CC_BE: case CC_A: // already unsigned return cc; case CC_S: case CC_NS: case CC_P: case CC_NP: // can't be fixed return CC_None; case CC_L: return CC_B; case CC_GE: return CC_AE; case CC_LE: return CC_BE; case CC_G: return CC_A; } not_reached(); } ); } /* * Return a (Vptr*, idx in `b') handle to the first store of `reg' within block * `b' after position `i'. * * Returns { nullptr, 0 } if no such store is found, or if that store is not * the only use of `reg'. */ std::pair<const Vptr*, size_t> storeq(Env& env, Vreg64 reg, Vlabel b, size_t i) { if (env.use_counts[reg] != 1) return { nullptr, 0 }; while (++i < env.unit.blocks[b].code.size()) { auto const& inst = env.unit.blocks[b].code[i]; if (inst.op == Vinstr::store && inst.store_.s == reg) { return { &(inst.store_.d), i }; } } return { nullptr, 0 }; } bool is_reg_const(Env& env, Vreg reg) { return env.consts[reg].has_value(); } template <typename Op> bool flip_operands_helper(Env& env, const Op& op, Vlabel b, size_t i) { // We want any constant register to be in the first operand, and any foldable // load to be in the second. if (!(is_reg_const(env, op.s0) || foldable_load(env, op.s1, b, i)) && (is_reg_const(env, op.s1) || foldable_load(env, op.s0, b, i))) { if (flip_ordered_uses(env, op.sf, b, i)) { return simplify_impl(env, b, i, Op { op.s1, op.s0, op.sf }); } } return false; } bool simplify(Env& env, const addq& vadd, Vlabel b, size_t i) { if (arch_any(Arch::ARM)) return false; auto stPair = storeq(env, vadd.d, b, i); auto const vptrs = stPair.first; auto const stIdx = stPair.second; if (vptrs) { auto const vptrl = foldable_load(env, vadd.s0, b, stIdx); if (vptrl && (*vptrl == *vptrs)) { bool ret = simplify_impl(env, b, i, addqrm { vadd.s1, *vptrs, vadd.sf }); // need to do it here because a store will not be removed as dead code if (ret) env.unit.blocks[b].code[stIdx] = nop{}; return ret; } auto const vptrl2 = foldable_load(env, vadd.s1, b, stIdx); if (vptrl2 && (*vptrl2 == *vptrs)) { bool ret = simplify_impl(env, b, i, addqrm { vadd.s0, *vptrs, vadd.sf }); if (ret) env.unit.blocks[b].code[stIdx] = nop{}; return ret; } } if (auto const vptr = foldable_load(env, vadd.s0, b, i)) { return simplify_impl(env, b, i, addqmr{*vptr, vadd.s1, vadd.d, vadd.sf}); } if (auto const vptr = foldable_load(env, vadd.s1, b, i)) { return simplify_impl(env, b, i, addqmr{*vptr, vadd.s0, vadd.d, vadd.sf}); } return false; } // find two lea that add an immediate to a register and fold into a // single operation. given this lea, look for a subsequent lea // within the same block whose base reg is this lea's dest reg, // and it is the only use of this lea's dest reg. // // first, try to simplify lea (%r1), %r2 into copy %r1,%r2 bool simplify(Env& env, const lea& vlea, Vlabel b, size_t i) { if (vlea.s.disp == 0 && !vlea.s.index.isValid() && arch() != Arch::ARM && vlea.s.base.isValid()) { simplify_impl(env, b, i, copy { vlea.s.base, vlea.d }); return true; } if (!vlea.s.index.isValid() && !vlea.s.base.isValid()) { simplify_impl(env, b, i, copy { env.unit.makeConst(vlea.s.disp), vlea.d }); return true; } auto xinst = env.unit.blocks[b].code[i]; bool found_second = false; size_t x; int disp2; if (vlea.d.isPhys()) { bool found_interf = false; for (x = i+1; x < env.unit.blocks[b].code.size(); ++x) { xinst = env.unit.blocks[b].code[x]; if (xinst.op == Vinstr::lea && xinst.lea_.s.base == vlea.d && xinst.lea_.d == vlea.d && !(xinst.lea_.s.index.isValid())) { found_second = true; disp2 = xinst.lea_.s.disp; break; } else { visitUses(env.unit, xinst, [&] (Vreg r) { if (r == vlea.d) found_interf = true; }); if (found_interf) return false; visitDefs(env.unit, xinst, [&] (Vreg r) { if (r == vlea.d) found_interf = true; }); if (found_interf) return false; } } } else { if (env.use_counts[vlea.d] == 1) { for (x = i+1 ; x < env.unit.blocks[b].code.size(); ++x) { xinst = env.unit.blocks[b].code[x]; if ((xinst.op == Vinstr::lea) && (xinst.lea_.s.base == vlea.d) && !xinst.lea_.s.index.isValid()) { found_second = true; disp2 = xinst.lea_.s.disp; break; } } } } if (found_second && deltaFits((int64_t)(vlea.s.disp) + (int64_t)(disp2), sz::dword)) { (void) simplify_impl(env, b, i, lea { vlea.s+disp2, (xinst).lea_.d }); // update uses and delete the inst (void) simplify_impl(env, b, x, [&] (Vout& v) { return 1; }); return true; } return false; } // remove compares with unused results. This overlaps with removeDeadCode, // but does it earlier. template <typename Cmp> bool simplify_dead_cmp(Env& env, const Cmp& cmp, Vlabel b, size_t i) { if (env.use_counts[cmp.sf] == 0) { return simplify_impl(env, b, i, nop{}); } return false; } bool simplify(Env& env, const cmpq& vcmp, Vlabel b, size_t i) { if (simplify_dead_cmp(env, vcmp, b, i)) return true; if (flip_operands_helper(env, vcmp, b, i)) return true; if (!arch_any(Arch::ARM)) { if (auto const vptr = foldable_load(env, vcmp.s1, b, i)) { return simplify_impl(env, b, i, cmpqm { vcmp.s0, *vptr, vcmp.sf }); } if (auto const vptr = foldable_load(env, vcmp.s0, b, i)) { if (flip_ordered_uses(env, vcmp.sf, b, i)) { return simplify_impl(env, b, i, cmpqm { vcmp.s1, *vptr, vcmp.sf }); } } } auto const sz0 = value_width(env, vcmp.s0); if (sz0 == sz::qword) return false; auto const sz1 = value_width(env, vcmp.s1); if (sz1 == sz::qword) return false; auto const sz = sz1 > sz0 ? sz1 : sz0; if (!fix_signed_uses(env, vcmp.sf, b, i)) return false; return simplify_impl(env, b, i, [&] (Vout& v) { return narrow_inst<cmpb, cmpw, cmpl>(env, sz, vcmp, b, i, v); }); } bool simplify(Env& env, const cmpl& vcmp, Vlabel b, size_t i) { if (simplify_dead_cmp(env, vcmp, b, i)) return true; if (flip_operands_helper(env, vcmp, b, i)) return true; if (arch_any(Arch::ARM)) return false; if (auto const vptr = foldable_load(env, vcmp.s1, b, i)) { return simplify_impl(env, b, i, cmplm { vcmp.s0, *vptr, vcmp.sf }); } return false; } bool simplify(Env& env, const cmpw& vcmp, Vlabel b, size_t i) { if (simplify_dead_cmp(env, vcmp, b, i)) return true; if (flip_operands_helper(env, vcmp, b, i)) return true; if (arch_any(Arch::ARM)) return false; if (auto const vptr = foldable_load(env, vcmp.s1, b, i)) { return simplify_impl(env, b, i, cmpwm { vcmp.s0, *vptr, vcmp.sf }); } return false; } bool simplify(Env& env, const cmpb& vcmp, Vlabel b, size_t i) { if (flip_operands_helper(env, vcmp, b, i)) return true; if (simplify_dead_cmp(env, vcmp, b, i)) return true; if (arch_any(Arch::ARM)) return false; if (auto const vptr = foldable_load(env, vcmp.s1, b, i)) { return simplify_impl(env, b, i, cmpbm { vcmp.s0, *vptr, vcmp.sf }); } return false; } bool simplify(Env& env, const cmpqi& vcmp, Vlabel b, size_t i) { if (simplify_dead_cmp(env, vcmp, b, i)) return true; if (arch_any(Arch::ARM)) return false; if (auto const vptr = foldable_load(env, vcmp.s1, b, i)) { return simplify_impl(env, b, i, cmpqim { vcmp.s0, *vptr, vcmp.sf }); } return false; } bool simplify(Env& env, const cmpli& vcmp, Vlabel b, size_t i) { if (simplify_dead_cmp(env, vcmp, b, i)) return true; if (arch_any(Arch::ARM)) return false; if (auto const vptr = foldable_load(env, vcmp.s1, b, i)) { return simplify_impl(env, b, i, cmplim { vcmp.s0, *vptr, vcmp.sf }); } return false; } bool simplify(Env& env, const cmpwi& vcmp, Vlabel b, size_t i) { if (simplify_dead_cmp(env, vcmp, b, i)) return true; if (arch_any(Arch::ARM)) return false; if (auto const vptr = foldable_load(env, vcmp.s1, b, i)) { return simplify_impl(env, b, i, cmpwim { vcmp.s0, *vptr, vcmp.sf }); } return false; } bool simplify(Env& env, const cmpbi& vcmp, Vlabel b, size_t i) { if (simplify_dead_cmp(env, vcmp, b, i)) return true; if (arch_any(Arch::ARM)) return false; if (auto const vptr = foldable_load(env, vcmp.s1, b, i)) { return simplify_impl(env, b, i, cmpbim { vcmp.s0, *vptr, vcmp.sf }); } return false; } bool simplify(Env& env, const cmpbim& vcmp, Vlabel b, size_t i) { return (simplify_dead_cmp(env, vcmp, b, i)); } bool simplify(Env& env, const cmpwim& vcmp, Vlabel b, size_t i) { return (simplify_dead_cmp(env, vcmp, b, i)); } bool simplify(Env& env, const cmplim& vcmp, Vlabel b, size_t i) { return (simplify_dead_cmp(env, vcmp, b, i)); } bool simplify(Env& env, const cmpqim& vcmp, Vlabel b, size_t i) { return (simplify_dead_cmp(env, vcmp, b, i)); } bool simplify(Env& env, const cmpbm& vcmp, Vlabel b, size_t i) { return (simplify_dead_cmp(env, vcmp, b, i)); } bool simplify(Env& env, const cmpwm& vcmp, Vlabel b, size_t i) { return (simplify_dead_cmp(env, vcmp, b, i)); } bool simplify(Env& env, const cmplm& vcmp, Vlabel b, size_t i) { return (simplify_dead_cmp(env, vcmp, b, i)); } bool simplify(Env& env, const cmpqm& vcmp, Vlabel b, size_t i) { return (simplify_dead_cmp(env, vcmp, b, i)); } // test sets C=O=0; shift sets them in unknown ways // Z,S and P are set the same way by both. bool fix_shift_test_flags(Env& env, Vreg sf, Vlabel b, size_t i) { return check_sf_usage( env, sf, b, i, [] (ConditionCode cc) { switch (cc) { case CC_None: always_assert(false); case CC_E: case CC_NE: case CC_S: case CC_NS: // only test Z and S, no change return cc; case CC_A: // C=0 && Z=0, but C is always zero return CC_NE; case CC_BE: // C=1 || Z=1, but C is always zero return CC_E; case CC_L: // S != OF, but OF is always zero return CC_S; case CC_GE: // S == OF, but OF is always zero return CC_NS; case CC_O: case CC_NO: case CC_P: case CC_NP: case CC_LE: case CC_G: case CC_AE: case CC_B: // can't be fixed return CC_None; } not_reached(); } ); } /////////////////////////////////////////////////////////////////////////////// /* * Test/And */ /* * If inst (assumed to be an instruction that writes an output * register d, and the status flags sf) is followed by a test * instruction, see if we can drop the test and just use inst's sf * flags. * * Depending on inst we may have to rewrite the uses of sf (see * fix_shift_test_flags), and for some uses, we may have to give * up. The caller provides fun to make these decisions. */ template<Vinstr::Opcode test, typename Inst, typename FlagsFunc> bool simplifyInstTest(Env& env, const Inst& inst, Vlabel b, size_t i, FlagsFunc fun) { if (env.use_counts[inst.sf]) return false; return if_inst<test>( env, b, i + 1, [&] (const op_type<test>& vtest) { if (inst.d != vtest.s0 || inst.d != vtest.s1 || !fun(vtest.sf)) { return false; } return simplify_impl( env, b, i, [&] (Vout& v) { auto repl = inst; repl.sf = vtest.sf; v << repl; return 2; } ); } ); } bool simplify(Env& env, const shrqi& vshr, Vlabel b, size_t i) { return simplifyInstTest<Vinstr::testq>( env, vshr, b, i, [&] (Vreg sf) { return fix_shift_test_flags(env, sf, b, i); } ); } template<Vinstr::Opcode test, typename Test, typename And> bool simplify_and(Env& env, const And& vand, Vlabel b, size_t i) { if (!env.use_counts[vand.d]) { return simplify_impl(env, b, i, Test{ vand.s0, vand.s1, vand.sf }); } return simplifyInstTest<test>(env, vand, b, i, [] (Vreg) { return true; }); } bool simplify(Env& env, const andq& vandq, Vlabel b, size_t i) { return simplify_and<Vinstr::testq, testq>(env, vandq, b, i); } bool simplify(Env& env, const andqi& vandqi, Vlabel b, size_t i) { return simplify_and<Vinstr::testq, testqi>(env, vandqi, b, i); } /* * Simplify masking values with -1 in andXi{}: * andbi{0xff, s, d} -> copy{s, d} * andwi{0xffff, s, d} -> copy{s, d} * andli{0xffffffff, s, d} -> copy{s, d} */ template<Vinstr::Opcode test, typename testi, typename andi> bool simplify_andi(Env& env, const andi& inst, Vlabel b, size_t i) { if (inst.s0.l() == -1 && env.use_counts[inst.sf] == 0) { return simplify_impl(env, b, i, copy{ inst.s1, inst.d }); } return simplify_and<test, testi>(env, inst, b, i); } bool simplify(Env& env, const andbi& andbi, Vlabel b, size_t i) { return simplify_andi<Vinstr::testb, testbi>(env, andbi, b, i); } bool simplify(Env& env, const andwi& andwi, Vlabel b, size_t i) { return simplify_andi<Vinstr::testw, testwi>(env, andwi, b, i); } bool simplify(Env& env, const andli& andli, Vlabel b, size_t i) { return simplify_andi<Vinstr::testl, testli>(env, andli, b, i); } template<typename Out, typename Long, typename In> bool simplify_signed_test(Env& env, const In& test, uint32_t val, Vlabel b, size_t i) { if (val == 0x80000000 && check_sf_usage( env, test.sf, b, i, [] (ConditionCode cc) { switch (cc) { case CC_None: always_assert(false); case CC_E: return CC_NS; case CC_NE: return CC_S; case CC_S: case CC_NS: return cc; case CC_A: case CC_BE: case CC_L: case CC_GE: case CC_O: case CC_NO: case CC_P: case CC_NP: case CC_LE: case CC_G: case CC_AE: case CC_B: // can't be fixed return CC_None; } not_reached(); } ) && simplify_impl(env, b, i, Out { test.s1, test.s1, test.sf })) { // This looks like it belongs in shrqi itself. The problem with // that is that it relies on the test optimizations already having // been done when we reach the shrqi. We could solve that by // iterating the simplify pass, but this should be cheaper, and // almost as good. if_inst<Vinstr::shrqi>( env, b, i - 1, [&] (const shrqi& vshr) { if (vshr.s0.l() != 32) return false; return simplify_impl( env, b, i - 1, [&] (Vout& v) { v << Long{ vshr.s1, vshr.s1, test.sf }; return 2; } ); } ); return true; } return false; } template<typename testm, typename test> bool simplify_testi(Env& env, const test& vtest, Vlabel b, size_t i) { if (arch_any(Arch::ARM)) return false; if (auto const vptr = foldable_load(env, vtest.s1, b, i)) { return simplify_impl(env, b, i, testm { vtest.s0, *vptr, vtest.sf }); } return false; } template<typename testm, typename cmpm, typename test> bool simplify_test(Env& env, const test& vtest, Vlabel b, size_t i) { if (arch_any(Arch::ARM)) return false; if (vtest.s0 == vtest.s1 && env.use_counts[vtest.s0] == 2) { env.use_counts[vtest.s0]--; auto const vptr = foldable_load(env, vtest.s0, b, i); env.use_counts[vtest.s0]++; if (vptr) { return simplify_impl(env, b, i, cmpm { 0, *vptr, vtest.sf }); } } if (simplify_testi<testm>(env, vtest, b, i)) return true; if (auto const vptr = foldable_load(env, vtest.s0, b, i)) { return simplify_impl(env, b, i, testm { vtest.s1, *vptr, vtest.sf }); } return false; } bool simplify(Env& env, const testqi& test, Vlabel b, size_t i) { return simplify_testi<testqim>(env, test, b, i); } bool simplify(Env& env, const testli& test, Vlabel b, size_t i) { if (simplify_testi<testlim>(env, test, b, i)) return true; return simplify_signed_test<testl, testq>(env, test, test.s0.l(), b, i); } bool simplify(Env& env, const testwi& test, Vlabel b, size_t i) { return simplify_testi<testwim>(env, test, b, i); } bool simplify(Env& env, const testbi& test, Vlabel b, size_t i) { return simplify_testi<testbim>(env, test, b, i); } template<int size, typename testim> bool shrink_test_immediate(Env& env, uint64_t v, Vptr ptr, Vreg sf, Vlabel b, size_t i) { if (!v) return simplify_impl(env, b, i, testbi{ 0, rarg(0), sf }); auto getNewVal = [&] (uint64_t val, int bits, bool top_bits, Vreg sf) -> Optional<int> { auto const mask = (1LL << bits) - 1; auto const low = mask >> 1; val &= mask; if (val <= low) return val; // If we're not looking at the top bit of the original result, and // the top bit of the reduced mask is set, we'll have to give up // if anyone cares about the sign bit. if (!top_bits && !check_sf_usage( env, sf, b, i, [] (ConditionCode cc) { switch (cc) { case CC_None: always_assert(false); case CC_E: case CC_NE: return cc; case CC_S: case CC_NS: case CC_A: case CC_BE: case CC_L: case CC_GE: case CC_O: case CC_NO: case CC_P: case CC_NP: case CC_LE: case CC_G: case CC_AE: case CC_B: // can't be fixed return CC_None; } not_reached(); })) { return std::nullopt; } return (val & low) - (mask - low); }; if (size == 1) return false; if (size == 2) { if (!(v & 0xff)) { auto const newVal = getNewVal(v >> 8, 8, true, sf); return newVal && simplify_impl( env, b, i, testbim{ *newVal, ptr + 1, sf }); } if (!(v & 0xff00)) { auto const newVal = getNewVal(v, 8, false, sf); return newVal && simplify_impl( env, b, i, testbim{ *newVal, ptr, sf }); } } if (size == 4) { if (!(v & 0xffff)) { auto const newVal = getNewVal(v >> 16, 16, true, sf); return newVal && simplify_impl( env, b, i, testwim{ *newVal, ptr + 2, sf }); } if (!(v & 0xffff0000)) { auto const newVal = getNewVal(v, 16, false, sf); return newVal && simplify_impl( env, b, i, testwim{ *newVal, ptr, sf }); } } if (size == 8) { if (!(v & 0xffffffff)) { auto const newVal = getNewVal(v >> 32, 32, true, sf); return newVal && simplify_impl( env, b, i, testlim{ *newVal, ptr + 4, sf }); } if (!(v & 0xffffffff00000000)) { auto const newVal = getNewVal(v, 32, false, sf); return newVal && simplify_impl( env, b, i, testlim{ *newVal, ptr, sf }); } } return false; } template<int size, typename testim> bool shrink_testim(Env& env, const testim& test, Vlabel b, size_t i) { return shrink_test_immediate<size, testim>( env, test.s0.q(), test.s1, test.sf, b, i ); } bool simplify(Env& env, const testqim& test, Vlabel b, size_t i) { return shrink_testim<sz::qword>(env, test, b, i); } bool simplify(Env& env, const testlim& test, Vlabel b, size_t i) { return shrink_testim<sz::dword>(env, test, b, i); } bool simplify(Env& env, const testwim& test, Vlabel b, size_t i) { return shrink_testim<sz::word>(env, test, b, i); } bool simplify(Env& env, const testbim& test, Vlabel b, size_t i) { return shrink_testim<sz::byte>(env, test, b, i); } bool simplify(Env& env, const testq& test, Vlabel b, size_t i) { auto const sz0 = value_width(env, test.s0); auto const sz1 = value_width(env, test.s1); auto const size = sz1 < sz0 ? sz1 : sz0; if (size >= sz::qword) return false; return simplify_impl(env, b, i, [&] (Vout& v) { return narrow_inst<testb, testw, testl>(env, size, test, b, i, v); }); return simplify_test<testqm, cmpqim>(env, test, b, i); } bool simplify(Env& env, const testl& test, Vlabel b, size_t i) { return simplify_test<testlm, cmplim>(env, test, b, i); } bool simplify(Env& env, const testw& test, Vlabel b, size_t i) { return simplify_test<testwm, cmpwim>(env, test, b, i); } bool simplify(Env& env, const testb& test, Vlabel b, size_t i) { return simplify_test<testbm, cmpbim>(env, test, b, i); } //////////////////////////////////////////////////////////////////////////////// /* * Conditional operations. */ bool simplify(Env& env, const setcc& vsetcc, Vlabel b, size_t i) { return if_inst<Vinstr::xorbi>(env, b, i + 1, [&] (const xorbi& vxorbi) { // setcc{cc, _, tmp}; xorbi{1, tmp, d, _}; -> setcc{~cc, _, tmp}; if (!(env.use_counts[vsetcc.d] == 1 && vxorbi.s0.b() == 1 && vxorbi.s1 == vsetcc.d && env.use_counts[vxorbi.sf] == 0)) return false; return simplify_impl(env, b, i, [&] (Vout& v) { v << setcc{ccNegate(vsetcc.cc), vsetcc.sf, vxorbi.d}; return 2; }); }); } /////////////////////////////////////////////////////////////////////////////// /* * Or with constant values */ namespace { template <typename Or> bool implOrSimplify( Env& env, const Or& inst, Vlabel b, size_t i, size_t immed ) { if (env.use_counts[inst.sf] != 0) return false; if (immed == 0) return simplify_impl(env, b, i, copy{inst.s1, inst.d}); auto const c = env.consts[inst.s1]; if (!c || c->isUndef) return false; return simplify_impl( env, b, i, [&] (Vout& v) { auto const s = v.cns(immed | c->val); v << copy{s, inst.d}; return 1; } ); } } // namespace bool simplify(Env& env, const orwi& inst, Vlabel b, size_t i) { auto const immed = inst.s0.w(); return implOrSimplify(env, inst, b, i, immed); } bool simplify(Env& env, const orli& inst, Vlabel b, size_t i) { auto const immed = inst.s0.l(); return implOrSimplify(env, inst, b, i, immed); } bool simplify(Env& env, const orqi& inst, Vlabel b, size_t i) { auto const immed = inst.s0.q(); return implOrSimplify(env, inst, b, i, immed); } bool simplify(Env& env, const orq& inst, Vlabel b, size_t i) { if (env.use_counts[inst.sf] != 0) return false; auto const c0 = env.consts[inst.s0]; auto const c1 = env.consts[inst.s1]; if (c0 && !c0->isUndef) { if (c1 && !c1->isUndef) { return simplify_impl(env, b, i, [&] (Vout& v) { auto s = v.cns(c0->val | c1->val); v << copy{s, inst.d}; return 1; }); } if (c0->val == 0) { return simplify_impl(env, b, i, copy{inst.s1, inst.d}); } } else if (c1 && c1->isUndef) { if (c1->val == 0) { return simplify_impl(env, b, i, copy{inst.s0, inst.d}); } } return false; } bool simplify(Env& env, const orqim& inst, Vlabel b, size_t i) { if (inst.s0.q() == 0 && env.use_counts[inst.sf] == 0) { return simplify_impl(env, b, i, nop{}); } return false; } /* * Fold a cmov of a certain width into a copy if both values are the same * register or have the same known constant value. */ template <typename Inst> bool cmov_fold_impl(Env& env, const Inst& inst, Vlabel b, size_t i) { auto const equivalent = [&]{ if (inst.t == inst.f) return true; auto const ct = env.consts[inst.t]; auto const cf = env.consts[inst.f]; if (!ct || !cf) return false; return *ct == *cf; }(); if (!equivalent) return false; return simplify_impl( env, b, i, [&] (Vout& v) { v << copy{inst.t, inst.d}; return 1; } ); } /* * Turn a cmov of a certain width into a matching setcc instruction if the * conditions are correct (both sources are constants of value 0 or 1). */ template <typename Inst, typename Extend> bool cmov_setcc_impl(Env& env, const Inst& inst, Vlabel b, size_t i, Extend extend) { auto const ct = env.consts[inst.t]; auto const cf = env.consts[inst.f]; if (!ct || !cf) return false; auto const check_const = [](Vconst c, bool& val) { if (c.isUndef) return false; switch (c.kind) { case Vconst::Quad: case Vconst::Long: case Vconst::Byte: if (c.val == 0) { val = false; return true; } else if (c.val == 1) { val = true; return true; } else { return false; } case Vconst::Double: return false; } not_reached(); }; bool t_val; if (!check_const(*ct, t_val)) return false; bool f_val; if (!check_const(*cf, f_val)) return false; return simplify_impl(env, b, i, [&] (Vout& v) { auto const d = env.unit.makeReg(); if (t_val == f_val) { v << copy{env.unit.makeConst(t_val), d}; } else if (t_val) { v << setcc{inst.cc, inst.sf, d}; } else { v << setcc{ccNegate(inst.cc), inst.sf, d}; } extend(v, d, inst.d); return 1; }); } bool simplify(Env& env, const cmovb& inst, Vlabel b, size_t i) { if (cmov_fold_impl(env, inst, b, i)) return true; return cmov_setcc_impl( env, inst, b, i, [](Vout& v, Vreg8 src, Vreg dest) { v << copy{src, dest}; } ); } bool simplify(Env& env, const cmovw& inst, Vlabel b, size_t i) { if (cmov_fold_impl(env, inst, b, i)) return true; return cmov_setcc_impl( env, inst, b, i, [](Vout& v, Vreg8 src, Vreg dest) { v << movzbw{src, dest}; } ); } bool simplify(Env& env, const cmovl& inst, Vlabel b, size_t i) { if (cmov_fold_impl(env, inst, b, i)) return true; return cmov_setcc_impl( env, inst, b, i, [](Vout& v, Vreg8 src, Vreg dest) { v << movzbl{src, dest}; } ); } bool simplify(Env& env, const cmovq& inst, Vlabel b, size_t i) { if (cmov_fold_impl(env, inst, b, i)) return true; return cmov_setcc_impl( env, inst, b, i, [](Vout& v, Vreg8 src, Vreg dest) { v << movzbq{src, dest}; } ); } //////////////////////////////////////////////////////////////////////////////// /* * Copies, loads, and stores. */ /* * Simplify load followed by truncation: * load{s, tmp}; movtqb{tmp, d} -> loadtqb{s, d} * load{s, tmp}; movtql{tmp, d} -> loadtql{s, d} * loadzbq{s, tmp}; movtqb{tmp, d} -> loadb{s, d} * loadzwq{s, tmp}; movtqw{tmp, d} -> loadw{s, d} * loadzlq{s, tmp}; movtql{tmp, d} -> loadl{s, d} */ template<Vinstr::Opcode mov_op, typename loadt, typename load_in> bool simplify_load_truncate(Env& env, const load_in& load, Vlabel b, size_t i) { if (env.use_counts[load.d] != 1) return false; auto const& code = env.unit.blocks[b].code; if (i + 1 >= code.size()) return false; return if_inst<mov_op>(env, b, i + 1, [&] (const op_type<mov_op>& mov) { if (load.d != mov.s) return false; return simplify_impl(env, b, i, [&] (Vout& v) { v << loadt{load.s, mov.d}; return 2; }); }); } bool simplify(Env& env, const load& load, Vlabel b, size_t i) { return simplify_load_truncate<Vinstr::movtqb, loadtqb>(env, load, b, i) || simplify_load_truncate<Vinstr::movtql, loadtql>(env, load, b, i); } bool simplify(Env& env, const loadzbq& load, Vlabel b, size_t i) { return simplify_load_truncate<Vinstr::movtqb, loadb>(env, load, b, i); } bool simplify(Env& env, const loadzwq& load, Vlabel b, size_t i) { return simplify_load_truncate<Vinstr::movtqw, loadw>(env, load, b, i); } bool simplify(Env& env, const loadzlq& load, Vlabel b, size_t i) { return simplify_load_truncate<Vinstr::movtql, loadl>(env, load, b, i); } bool simplify(Env& env, const movzlq& inst, Vlabel b, size_t i) { auto const def_op = env.def_insts[inst.s]; // Check if `inst.s' was defined by an instruction with Vreg32 operands, or // movzbl{} in particular (which lowers to a movl{}). if (width(def_op) != Width::Long && def_op != Vinstr::movzbl) { return false; } // If so, the movzlq{} is redundant---instructions on 32-bit registers on x64 // always zero the upper bits. return simplify_impl(env, b, i, [&] (Vout& v) { v << copy{inst.s, inst.d}; return 1; }); } struct regWidth { explicit regWidth(Vreg r) { givenReg = r; }; template<class T> void imm (T) {} template<class T> void def (T) {} template<class T> void use (T) {} void use (Vptr ptr) { if (givenReg == ptr.base) { has_vptr = true; w = Width::Quad; } if (givenReg == ptr.index) { has_vptr = true; w = Width::Quad; } } void use (Vreg8 r) { if (!has_vptr && r == givenReg) w = Width::Byte; } void use (Vreg16 r) { if (!has_vptr && r == givenReg) w = Width::Word; } void use (Vreg32 r) { if (!has_vptr && r == givenReg) w = Width::Long; } void use (Vreg64 r) { if (r == givenReg) w = Width::Quad; } void use (Vreg r) { if (r == givenReg) w = Width::Quad; } void use (RegXMM r) { if (r == givenReg) w = Width::Octa; } template<class T> void across (T) {} template<class T, class H> void useHint(T s,H d) { use(s); } template<class T, class H> void defHint(T,H) {} template<Width w> void use(Vp<w>& m) { use(static_cast<Vptr>(m)); } Vreg givenReg; Width w {Width::None}; bool has_vptr {false}; }; Width usedWidth(Vreg reg, Vinstr inst) { regWidth v{reg}; visitOperands(inst,v); if (v.has_vptr) return Width::Quad; if (v.w != Width::None) return v.w; return Width::None; } // change loadb into a zero extending load of the dest reg is used as a wide // reg. Do it only if there is a use of the reg is a wider width. // When there is an assignment to a byte reg followed by a use of the full reg, // the HW has is merge the new 8 bits with the old 24 / 56 bits, which is a penalty // in all HW we use. The magnitude of the penalty is HW specific, and currently it is not // huge. Most HW does it via an injection of a micro operation. // If there is no wide use then a loadb is preferable to a loadzx because the encoding is // smaller. So only do this if we can find the wide use. // If there is another def after the loadb then it eliminates the penalty, and therefore // we avoid the transformation. // Do not do this is there already is a wide store to this reg. // register allocation generates register copies and creates opportunities for // this optimization. bool psimplify(Env& env, const loadb& vldb, Vlabel b, size_t i) { bool found_wide_use = false; bool found_def = false; Vreg wide_reg; for (auto x = i + 1; x < env.unit.blocks[b].code.size(); ++x) { const auto xinst = env.unit.blocks[b].code[x]; if (Vinstr::phpret == xinst.op) continue; visitDefs(env.unit, xinst, [&] (Vreg r) { if (r == vldb.d) { found_def = true; return; } }); if (found_def) return false; visitUses(env.unit, xinst, [&] (Vreg r) { if (r == vldb.d) { Width w = usedWidth(r, xinst); if ((Width::Long == w) || (Width::Quad == w)) { wide_reg = r; found_wide_use = true; } } }); if (found_wide_use) { auto const zinst = loadzbl { vldb.s, wide_reg }; return vmodify(env.unit, b, i, [&] (Vout& v) { v << zinst; return 1; } ); } } return false; } /////////////////////////////////////////////////////////////////////////////// /* * Pushes and pops. */ bool simplify(Env& env, const pop& inst, Vlabel b, size_t i) { if (env.use_counts[inst.d]) return false; // Convert to an lea when popping to a reg without any uses. return simplify_impl(env, b, i, [&] (Vout& v) { v << lea{reg::rsp[8], reg::rsp}; return 1; }); } bool simplify(Env& env, const orlim& vorlim, Vlabel b, size_t i) { auto const orinst = env.unit.blocks[b].code[i]; auto orOperand = orinst.orlim_.s0.l(); const Vptr32 orDest = orinst.orlim_.m; for (int x = i - 1; x >= 0; --x) { auto xinst = env.unit.blocks[b].code[x]; if (Vinstr::storeli == xinst.op) { const Vptr32 stDest = xinst.storeli_.m; if (stDest == orDest) { for (auto j = x+1; j < i; ++j) { if (!cannot_alias_write(env.unit.blocks[b].code[j],xinst)) { return false; } } const auto stOperand = xinst.storeli_.s.l(); auto newOp = stOperand | orOperand; return simplify_impl(env, b, i, storeli { newOp, stDest }); } } else if (Vinstr::storel == xinst.op) { const auto srcReg = xinst.storel_.s; if (auto const c = env.consts[srcReg]) { const Vptr32 stDest = xinst.storel_.m; if (orDest == stDest) { for (auto j = x + 1; j < i; ++j) { if (!cannot_alias_write(env.unit.blocks[b].code[j],xinst)) { return false; } } const auto stOperand = c->val; const int newOp = stOperand | orOperand; return simplify_impl(env, b, i, storeli { newOp, stDest }); } } } } return false; } /////////////////////////////////////////////////////////////////////////////// // Try to do constant folding on Vptr operands. struct SimplifyVptrVisit { explicit SimplifyVptrVisit(Env& env) : env{env} {} template<class T> void def(T) {} template<class T> void imm(const T&) {} template<class T> void across(T) {} template<class T, class H> void defHint(T, H) {} template<class T, class H> void useHint(T r, H) { use(r); } template <typename T> void use(T) {} void use(Vptr& ptr) { // If the base is a constant, we can fold it into the displacement (if it // fits). if (ptr.base.isValid()) { if (auto const c = env.consts[ptr.base]) { auto const newDisp = static_cast<int64_t>(ptr.disp) + c->val; if (deltaFits(newDisp, sz::dword)) { ptr.disp = newDisp; ptr.base = Vreg{}; changed = true; } } } // If the index is a constant, we can fold it into the displacement (if it // fits). if (ptr.index.isValid()) { if (auto const c = env.consts[ptr.index]) { auto const newDisp = static_cast<int64_t>(ptr.disp) + c->val * ptr.scale; if (deltaFits(newDisp, sz::dword)) { ptr.disp = newDisp; ptr.index = Vreg{}; ptr.scale = 1; changed = true; } } } } Env& env; bool changed = false; }; bool simplify_vptr(Env& env, Vinstr& inst) { SimplifyVptrVisit v{env}; visitOperands(inst, v); return v.changed; } /////////////////////////////////////////////////////////////////////////////// /* * Perform peephole simplification at instruction `i' of block `b'. * * Return true if changes were made, else false. */ bool simplify(Env& env, Vlabel b, size_t i) { assertx(i <= env.unit.blocks[b].code.size()); auto& inst = env.unit.blocks[b].code[i]; auto const changed = simplify_vptr(env, inst); switch (inst.op) { #define O(name, ...) \ case Vinstr::name: \ return simplify(env, inst.name##_, b, i) || changed; \ VASM_OPCODES #undef O } not_reached(); } bool psimplify(Env& env, Vlabel b, size_t i) { assertx(i <= env.unit.blocks[b].code.size()); auto& inst = env.unit.blocks[b].code[i]; switch (inst.op) { #define O(name, ...) \ case Vinstr::name: \ return psimplify(env, inst.name##_, b, i); \ VASM_OPCODES #undef O } not_reached(); } /* * Perform architecture-specific peephole simplification. */ bool simplify_arch(Env& env, Vlabel b, size_t i) { return ARCH_SWITCH_CALL(simplify, env, b, i); } /////////////////////////////////////////////////////////////////////////////// } /* * Peephole simplification pass for a Vunit. */ void simplify(Vunit& unit) { assertx(check(unit)); auto& blocks = unit.blocks; Env env { unit }; env.use_counts.resize(unit.next_vr); env.def_insts.resize(unit.next_vr, Vinstr::nop); env.consts.resize(unit.next_vr); auto const labels = sortBlocks(unit); // Set up Env, only visiting reachable blocks. for (auto const b : labels) { assertx(!blocks[b].code.empty()); for (auto const& inst : blocks[b].code) { visitDefs(unit, inst, [&] (Vreg r) { env.def_insts[r] = inst.op; }); visitUses(unit, inst, [&] (Vreg r) { ++env.use_counts[r]; }); } }; for (auto const& p : env.unit.regToConst) { env.consts[p.first] = p.second; } // The simplify() implementations may allocate scratch blocks and modify // instruction streams, so we cannot use standard iterators here. for (auto const b : labels) { for (size_t i = 0; i < blocks[b].code.size(); ++i) { // Simplify at this index until no changes are made. while (simplify(env, b, i) || simplify_arch(env, b, i)) { // Stop if we simplified away the tail of the block. if (i >= blocks[b].code.size()) break; } } }; printUnit(kVasmSimplifyLevel, "after vasm simplify", unit); } /* * Peephole simplification pass after register allocation, for opportunities * that either require physical regs or are created by register allocator. */ void postRASimplify(Vunit& unit) { assertx(check(unit)); auto& blocks = unit.blocks; Env env { unit }; auto const labels = sortBlocks(unit); // The simplify() implementations may allocate scratch blocks and modify // instruction streams, so we cannot use standard iterators here. for (auto const b : labels) { for (size_t i = 0; i < blocks[b].code.size(); ++i) { // Simplify at this index until no changes are made. while (psimplify(env, b, i)) { // Stop if we simplified away the tail of the block. if (i >= blocks[b].code.size()) break; } } }; printUnit(kVasmSimplifyLevel, "after vasm postRASimplify", unit); } /////////////////////////////////////////////////////////////////////////////// }