in libredex/IRCode.cpp [856:1102]
bool IRCode::try_sync(DexCode* code) {
std::unordered_map<MethodItemEntry*, uint32_t> entry_to_addr;
uint32_t addr = 0;
// Step 1, regenerate opcode list for the method, and
// and calculate the opcode entries address offsets.
TRACE(MTRANS, 5, "Emitting opcodes");
for (auto miter = m_ir_list->begin(); miter != m_ir_list->end(); ++miter) {
MethodItemEntry* mentry = &*miter;
TRACE(MTRANS, 5, "Analyzing mentry %p", mentry);
entry_to_addr[mentry] = addr;
if (mentry->type == MFLOW_DEX_OPCODE) {
TRACE(MTRANS, 5, "Emitting mentry %p at %08x", mentry, addr);
addr += mentry->dex_insn->size();
}
}
// Step 2, Branch relaxation: calculate branch offsets for if-* and goto
// opcodes, resizing them where necessary. Since resizing opcodes affects
// address offsets, we need to iterate this to a fixed point.
//
// For instructions that use address offsets but never need resizing (i.e.
// switch and fill-array-data opcodes), we calculate their offsets after
// we have reached the fixed point.
TRACE(MTRANS, 5, "Recalculating branches");
std::vector<MethodItemEntry*> multi_branches;
std::unordered_map<MethodItemEntry*, std::vector<BranchTarget*>> multis;
std::unordered_map<BranchTarget*, uint32_t> multi_targets;
bool needs_resync = false;
for (auto miter = m_ir_list->begin(); miter != m_ir_list->end(); ++miter) {
MethodItemEntry* mentry = &*miter;
if (entry_to_addr.find(mentry) == entry_to_addr.end()) {
continue;
}
if (mentry->type == MFLOW_DEX_OPCODE) {
auto opcode = mentry->dex_insn->opcode();
if (dex_opcode::is_switch(opcode)) {
multi_branches.push_back(mentry);
}
}
if (mentry->type == MFLOW_TARGET) {
BranchTarget* bt = mentry->target;
if (bt->type == BRANCH_MULTI) {
multis[bt->src].push_back(bt);
multi_targets[bt] = entry_to_addr.at(mentry);
// We can't fix the primary switch opcodes address until we emit
// the fopcode, which comes later.
} else if (bt->type == BRANCH_SIMPLE &&
dex_opcode::is_branch(bt->src->dex_insn->opcode())) {
MethodItemEntry* branch_op_mie = bt->src;
auto branch_addr = entry_to_addr.find(branch_op_mie);
always_assert_log(branch_addr != entry_to_addr.end(),
"%s refers to nonexistent branch instruction",
SHOW(*mentry));
int32_t branch_offset = entry_to_addr.at(mentry) - branch_addr->second;
needs_resync |= !encode_offset(m_ir_list, mentry, branch_offset);
}
}
}
if (needs_resync) {
return false;
}
size_t num_align_nops{0};
auto& opout = code->reset_instructions();
for (auto& mie : *m_ir_list) {
// We are assuming that fill-array-data-payload opcodes are always at
// the end of the opcode stream (we enforce that during instruction
// lowering). I.e. they are only followed by other fill-array-data-payload
// opcodes. So adjusting their addresses here does not require re-running
// branch relaxation.
entry_to_addr.at(&mie) += num_align_nops;
if (mie.type == MFLOW_TARGET &&
mie.target->src->dex_insn->opcode() == DOPCODE_FILL_ARRAY_DATA) {
// This MFLOW_TARGET is right before a fill-array-data-payload opcode,
// so we should make sure its address is aligned
if (entry_to_addr.at(&mie) & 1) {
opout.push_back(new DexInstruction(DOPCODE_NOP));
++entry_to_addr.at(&mie);
++num_align_nops;
}
mie.target->src->dex_insn->set_offset(entry_to_addr.at(&mie) -
entry_to_addr.at(mie.target->src));
continue;
}
if (mie.type != MFLOW_DEX_OPCODE) {
continue;
}
TRACE(MTRANS, 6, "Emitting insn %s", SHOW(mie.dex_insn));
opout.push_back(mie.dex_insn);
}
addr += num_align_nops;
TRACE(MTRANS, 5, "Emitting multi-branches");
// Step 3, generate multi-branch fopcodes
for (auto multiopcode : multi_branches) {
auto& targets = multis[multiopcode];
auto multi_insn = multiopcode->dex_insn;
std::sort(targets.begin(), targets.end(), multi_target_compare_case_key);
always_assert_log(!targets.empty(), "need to have targets for %s",
SHOW(*multiopcode));
if (multiopcode->dex_insn->opcode() == DOPCODE_SPARSE_SWITCH) {
// Emit align nop
if (addr & 1) {
DexInstruction* nop = new DexInstruction(DOPCODE_NOP);
opout.push_back(nop);
addr++;
}
// TODO: Any integrity checks should really be in the DexOpcodeData
// constructors, which do not know right now about the formats.
// Emit sparse.
// Note: the count does *not* need to fit into 16 bits, but the targets
// do.
always_assert(targets.size() <= std::numeric_limits<uint16_t>::max());
// Be legible, the compiler should improve this:
// * opcode
// * "size" = targets
// * target keys x int in shorts
// * target offsets x int in shorts
const size_t count = 1 + 1 + 2 * targets.size() * (4 / 2);
auto sparse_payload = std::make_unique<uint16_t[]>(count);
sparse_payload[0] = FOPCODE_SPARSE_SWITCH;
sparse_payload[1] = (uint16_t)targets.size();
uint32_t* spkeys = (uint32_t*)&sparse_payload[2];
uint32_t* sptargets =
(uint32_t*)&sparse_payload[2 + (targets.size() * 2)];
for (BranchTarget* target : targets) {
*spkeys++ = target->case_key;
*sptargets++ = multi_targets[target] - entry_to_addr.at(multiopcode);
}
// Insert the new fopcode...
DexInstruction* fop = new DexOpcodeData(sparse_payload.get(), count - 1);
opout.push_back(fop);
// re-write the source opcode with the address of the
// fopcode, increment the address of the fopcode.
multi_insn->set_offset(addr - entry_to_addr.at(multiopcode));
multi_insn->set_opcode(DOPCODE_SPARSE_SWITCH);
addr += count;
} else {
// Emit packed.
std::vector<int32_t> case_keys;
case_keys.reserve(targets.size());
for (const BranchTarget* t : targets) {
case_keys.push_back(t->case_key);
}
const uint64_t size =
instruction_lowering::get_packed_switch_size(case_keys);
always_assert(size <= std::numeric_limits<uint16_t>::max());
const size_t count = (size * 2) + 4;
auto packed_payload = std::make_unique<uint16_t[]>(count);
packed_payload[0] = FOPCODE_PACKED_SWITCH;
packed_payload[1] = size;
uint32_t* psdata = (uint32_t*)&packed_payload[2];
int32_t next_key = *psdata++ = targets.front()->case_key;
redex_assert(targets.front()->case_key <= targets.back()->case_key);
for (BranchTarget* target : targets) {
// Fill in holes with relative offsets that are falling through to the
// instruction after the switch instruction
for (; next_key != target->case_key; ++next_key) {
*psdata++ = 3; // packed-switch statement is three code units
}
*psdata++ = multi_targets[target] - entry_to_addr.at(multiopcode);
auto update_next = [&]() NO_SIGNED_INT_OVERFLOW { ++next_key; };
update_next();
}
// Emit align nop
if (addr & 1) {
DexInstruction* nop = new DexInstruction(DOPCODE_NOP);
opout.push_back(nop);
addr++;
}
// Insert the new fopcode...
DexInstruction* fop = new DexOpcodeData(packed_payload.get(), count - 1);
opout.push_back(fop);
// re-write the source opcode with the address of the
// fopcode, increment the address of the fopcode.
multi_insn->set_offset(addr - entry_to_addr.at(multiopcode));
multi_insn->set_opcode(DOPCODE_PACKED_SWITCH);
addr += count;
}
}
// Step 4, emit debug entries
TRACE(MTRANS, 5, "Emitting debug entries");
auto debugitem = code->get_debug_item();
if (debugitem) {
gather_debug_entries(m_ir_list, entry_to_addr, &debugitem->get_entries());
}
// Step 5, try/catch blocks
TRACE(MTRANS, 5, "Emitting try items & catch handlers");
auto& tries = code->get_tries();
tries.clear();
MethodItemEntry* active_try = nullptr;
for (auto& mentry : *m_ir_list) {
if (mentry.type != MFLOW_TRY) {
continue;
}
auto& tentry = mentry.tentry;
if (tentry->type == TRY_START) {
always_assert(active_try == nullptr);
active_try = &mentry;
continue;
}
redex_assert(tentry->type == TRY_END);
auto try_end = &mentry;
auto try_start = active_try;
active_try = nullptr;
always_assert_log(try_start != nullptr, "unopened try_end found: %s",
SHOW(*try_end));
always_assert_log(try_start->tentry->catch_start ==
try_end->tentry->catch_start,
"mismatched try start (%s) and end (%s)",
SHOW(*try_start),
SHOW(*try_end));
auto start_addr = entry_to_addr.at(try_start);
auto end_addr = entry_to_addr.at(try_end);
if (start_addr == end_addr) {
continue;
}
DexCatches catches;
for (auto mei = try_end->tentry->catch_start; mei != nullptr;
mei = mei->centry->next) {
if (mei->centry->next != nullptr) {
always_assert(mei->centry->catch_type != nullptr);
}
catches.emplace_back(mei->centry->catch_type, entry_to_addr.at(mei));
}
split_and_insert_try_regions(start_addr, end_addr, catches, &tries);
}
always_assert_log(active_try == nullptr, "unclosed try_start found");
std::sort(tries.begin(),
tries.end(),
[](const std::unique_ptr<DexTryItem>& a,
const std::unique_ptr<DexTryItem>& b) {
return a->m_start_addr < b->m_start_addr;
});
return true;
}