bool IRCode::try_sync()

in libredex/IRCode.cpp [856:1102]


bool IRCode::try_sync(DexCode* code) {
  std::unordered_map<MethodItemEntry*, uint32_t> entry_to_addr;
  uint32_t addr = 0;
  // Step 1, regenerate opcode list for the method, and
  // and calculate the opcode entries address offsets.
  TRACE(MTRANS, 5, "Emitting opcodes");
  for (auto miter = m_ir_list->begin(); miter != m_ir_list->end(); ++miter) {
    MethodItemEntry* mentry = &*miter;
    TRACE(MTRANS, 5, "Analyzing mentry %p", mentry);
    entry_to_addr[mentry] = addr;
    if (mentry->type == MFLOW_DEX_OPCODE) {
      TRACE(MTRANS, 5, "Emitting mentry %p at %08x", mentry, addr);
      addr += mentry->dex_insn->size();
    }
  }
  // Step 2, Branch relaxation: calculate branch offsets for if-* and goto
  // opcodes, resizing them where necessary. Since resizing opcodes affects
  // address offsets, we need to iterate this to a fixed point.
  //
  // For instructions that use address offsets but never need resizing (i.e.
  // switch and fill-array-data opcodes), we calculate their offsets after
  // we have reached the fixed point.
  TRACE(MTRANS, 5, "Recalculating branches");
  std::vector<MethodItemEntry*> multi_branches;
  std::unordered_map<MethodItemEntry*, std::vector<BranchTarget*>> multis;
  std::unordered_map<BranchTarget*, uint32_t> multi_targets;
  bool needs_resync = false;
  for (auto miter = m_ir_list->begin(); miter != m_ir_list->end(); ++miter) {
    MethodItemEntry* mentry = &*miter;
    if (entry_to_addr.find(mentry) == entry_to_addr.end()) {
      continue;
    }
    if (mentry->type == MFLOW_DEX_OPCODE) {
      auto opcode = mentry->dex_insn->opcode();
      if (dex_opcode::is_switch(opcode)) {
        multi_branches.push_back(mentry);
      }
    }
    if (mentry->type == MFLOW_TARGET) {
      BranchTarget* bt = mentry->target;
      if (bt->type == BRANCH_MULTI) {
        multis[bt->src].push_back(bt);
        multi_targets[bt] = entry_to_addr.at(mentry);
        // We can't fix the primary switch opcodes address until we emit
        // the fopcode, which comes later.
      } else if (bt->type == BRANCH_SIMPLE &&
                 dex_opcode::is_branch(bt->src->dex_insn->opcode())) {
        MethodItemEntry* branch_op_mie = bt->src;
        auto branch_addr = entry_to_addr.find(branch_op_mie);
        always_assert_log(branch_addr != entry_to_addr.end(),
                          "%s refers to nonexistent branch instruction",
                          SHOW(*mentry));
        int32_t branch_offset = entry_to_addr.at(mentry) - branch_addr->second;
        needs_resync |= !encode_offset(m_ir_list, mentry, branch_offset);
      }
    }
  }
  if (needs_resync) {
    return false;
  }

  size_t num_align_nops{0};
  auto& opout = code->reset_instructions();
  for (auto& mie : *m_ir_list) {
    // We are assuming that fill-array-data-payload opcodes are always at
    // the end of the opcode stream (we enforce that during instruction
    // lowering). I.e. they are only followed by other fill-array-data-payload
    // opcodes. So adjusting their addresses here does not require re-running
    // branch relaxation.
    entry_to_addr.at(&mie) += num_align_nops;
    if (mie.type == MFLOW_TARGET &&
        mie.target->src->dex_insn->opcode() == DOPCODE_FILL_ARRAY_DATA) {
      // This MFLOW_TARGET is right before a fill-array-data-payload opcode,
      // so we should make sure its address is aligned
      if (entry_to_addr.at(&mie) & 1) {
        opout.push_back(new DexInstruction(DOPCODE_NOP));
        ++entry_to_addr.at(&mie);
        ++num_align_nops;
      }
      mie.target->src->dex_insn->set_offset(entry_to_addr.at(&mie) -
                                            entry_to_addr.at(mie.target->src));
      continue;
    }
    if (mie.type != MFLOW_DEX_OPCODE) {
      continue;
    }
    TRACE(MTRANS, 6, "Emitting insn %s", SHOW(mie.dex_insn));
    opout.push_back(mie.dex_insn);
  }
  addr += num_align_nops;

  TRACE(MTRANS, 5, "Emitting multi-branches");
  // Step 3, generate multi-branch fopcodes
  for (auto multiopcode : multi_branches) {
    auto& targets = multis[multiopcode];
    auto multi_insn = multiopcode->dex_insn;
    std::sort(targets.begin(), targets.end(), multi_target_compare_case_key);
    always_assert_log(!targets.empty(), "need to have targets for %s",
                      SHOW(*multiopcode));
    if (multiopcode->dex_insn->opcode() == DOPCODE_SPARSE_SWITCH) {
      // Emit align nop
      if (addr & 1) {
        DexInstruction* nop = new DexInstruction(DOPCODE_NOP);
        opout.push_back(nop);
        addr++;
      }

      // TODO: Any integrity checks should really be in the DexOpcodeData
      //       constructors, which do not know right now about the formats.

      // Emit sparse.

      // Note: the count does *not* need to fit into 16 bits, but the targets
      //       do.
      always_assert(targets.size() <= std::numeric_limits<uint16_t>::max());

      // Be legible, the compiler should improve this:
      //    * opcode
      //    * "size" = targets
      //    * target keys x int in shorts
      //    * target offsets x int in shorts
      const size_t count = 1 + 1 + 2 * targets.size() * (4 / 2);
      auto sparse_payload = std::make_unique<uint16_t[]>(count);

      sparse_payload[0] = FOPCODE_SPARSE_SWITCH;

      sparse_payload[1] = (uint16_t)targets.size();
      uint32_t* spkeys = (uint32_t*)&sparse_payload[2];
      uint32_t* sptargets =
          (uint32_t*)&sparse_payload[2 + (targets.size() * 2)];

      for (BranchTarget* target : targets) {
        *spkeys++ = target->case_key;
        *sptargets++ = multi_targets[target] - entry_to_addr.at(multiopcode);
      }

      // Insert the new fopcode...
      DexInstruction* fop = new DexOpcodeData(sparse_payload.get(), count - 1);
      opout.push_back(fop);
      // re-write the source opcode with the address of the
      // fopcode, increment the address of the fopcode.
      multi_insn->set_offset(addr - entry_to_addr.at(multiopcode));
      multi_insn->set_opcode(DOPCODE_SPARSE_SWITCH);
      addr += count;
    } else {
      // Emit packed.
      std::vector<int32_t> case_keys;
      case_keys.reserve(targets.size());
      for (const BranchTarget* t : targets) {
        case_keys.push_back(t->case_key);
      }
      const uint64_t size =
          instruction_lowering::get_packed_switch_size(case_keys);
      always_assert(size <= std::numeric_limits<uint16_t>::max());
      const size_t count = (size * 2) + 4;
      auto packed_payload = std::make_unique<uint16_t[]>(count);
      packed_payload[0] = FOPCODE_PACKED_SWITCH;
      packed_payload[1] = size;
      uint32_t* psdata = (uint32_t*)&packed_payload[2];
      int32_t next_key = *psdata++ = targets.front()->case_key;
      redex_assert(targets.front()->case_key <= targets.back()->case_key);
      for (BranchTarget* target : targets) {
        // Fill in holes with relative offsets that are falling through to the
        // instruction after the switch instruction
        for (; next_key != target->case_key; ++next_key) {
          *psdata++ = 3; // packed-switch statement is three code units
        }
        *psdata++ = multi_targets[target] - entry_to_addr.at(multiopcode);
        auto update_next = [&]() NO_SIGNED_INT_OVERFLOW { ++next_key; };
        update_next();
      }
      // Emit align nop
      if (addr & 1) {
        DexInstruction* nop = new DexInstruction(DOPCODE_NOP);
        opout.push_back(nop);
        addr++;
      }
      // Insert the new fopcode...
      DexInstruction* fop = new DexOpcodeData(packed_payload.get(), count - 1);
      opout.push_back(fop);
      // re-write the source opcode with the address of the
      // fopcode, increment the address of the fopcode.
      multi_insn->set_offset(addr - entry_to_addr.at(multiopcode));
      multi_insn->set_opcode(DOPCODE_PACKED_SWITCH);
      addr += count;
    }
  }

  // Step 4, emit debug entries
  TRACE(MTRANS, 5, "Emitting debug entries");
  auto debugitem = code->get_debug_item();
  if (debugitem) {
    gather_debug_entries(m_ir_list, entry_to_addr, &debugitem->get_entries());
  }
  // Step 5, try/catch blocks
  TRACE(MTRANS, 5, "Emitting try items & catch handlers");
  auto& tries = code->get_tries();
  tries.clear();
  MethodItemEntry* active_try = nullptr;
  for (auto& mentry : *m_ir_list) {
    if (mentry.type != MFLOW_TRY) {
      continue;
    }
    auto& tentry = mentry.tentry;
    if (tentry->type == TRY_START) {
      always_assert(active_try == nullptr);
      active_try = &mentry;
      continue;
    }
    redex_assert(tentry->type == TRY_END);
    auto try_end = &mentry;
    auto try_start = active_try;
    active_try = nullptr;

    always_assert_log(try_start != nullptr, "unopened try_end found: %s",
                      SHOW(*try_end));
    always_assert_log(try_start->tentry->catch_start ==
                          try_end->tentry->catch_start,
                      "mismatched try start (%s) and end (%s)",
                      SHOW(*try_start),
                      SHOW(*try_end));
    auto start_addr = entry_to_addr.at(try_start);
    auto end_addr = entry_to_addr.at(try_end);
    if (start_addr == end_addr) {
      continue;
    }

    DexCatches catches;
    for (auto mei = try_end->tentry->catch_start; mei != nullptr;
         mei = mei->centry->next) {
      if (mei->centry->next != nullptr) {
        always_assert(mei->centry->catch_type != nullptr);
      }
      catches.emplace_back(mei->centry->catch_type, entry_to_addr.at(mei));
    }
    split_and_insert_try_regions(start_addr, end_addr, catches, &tries);
  }
  always_assert_log(active_try == nullptr, "unclosed try_start found");

  std::sort(tries.begin(),
            tries.end(),
            [](const std::unique_ptr<DexTryItem>& a,
               const std::unique_ptr<DexTryItem>& b) {
              return a->m_start_addr < b->m_start_addr;
            });
  return true;
}