bool CommonSubexpressionElimination::patch()

in service/cse/CommonSubexpressionElimination.cpp [1506:1719]


bool CommonSubexpressionElimination::patch(bool runtime_assertions) {
  if (m_forward.empty()) {
    return false;
  }

  TRACE(CSE, 5, "[CSE] before:\n%s", SHOW(m_cfg));

  // gather relevant instructions, and allocate temp registers

  // We'll allocate one temp per "earlier_insns_index".
  // TODO: Do better, use less. A subset and its superset can share a temp.
  std::unordered_map<size_t, std::pair<IROpcode, reg_t>> temps;
  // We also remember for which instructions we'll need an iterator, as we'll
  // want to insert something after them.
  std::unordered_set<const IRInstruction*> iterator_insns;
  std::unordered_set<const IRInstruction*> combined_earlier_insns;
  for (const auto& f : m_forward) {
    iterator_insns.insert(f.insn);
    if (temps.count(f.earlier_insns_index)) {
      continue;
    }
    auto& earlier_insns = m_earlier_insns.at(f.earlier_insns_index);
    auto move_or_const_literal = get_move_or_const_literal(earlier_insns);
    auto opcode = move_or_const_literal.first;
    if (!opcode::is_a_move(opcode)) {
      continue;
    }
    combined_earlier_insns.insert(earlier_insns.begin(), earlier_insns.end());
    reg_t temp_reg = opcode == OPCODE_MOVE_WIDE ? m_cfg.allocate_wide_temp()
                                                : m_cfg.allocate_temp();
    temps.emplace(f.earlier_insns_index, std::make_pair(opcode, temp_reg));
  }
  for (auto earlier_insn : combined_earlier_insns) {
    iterator_insns.insert(earlier_insn);
    if (earlier_insn->has_dest()) {
      m_stats.results_captured++;
    } else if (earlier_insn->opcode() == OPCODE_NEW_ARRAY) {
      m_stats.array_lengths_captured++;
    } else {
      always_assert(opcode::is_an_aput(earlier_insn->opcode()) ||
                    opcode::is_an_iput(earlier_insn->opcode()) ||
                    opcode::is_an_sput(earlier_insn->opcode()));
      m_stats.stores_captured++;
    }
  }

  for (auto unboxing_insn : m_unboxing) {
    iterator_insns.insert(unboxing_insn);
  }

  // find all iterators in one sweep

  std::unordered_map<IRInstruction*, cfg::InstructionIterator> iterators;
  auto iterable = cfg::InstructionIterable(m_cfg);
  for (auto it = iterable.begin(); it != iterable.end(); ++it) {
    auto* insn = it->insn;
    if (iterator_insns.count(insn)) {
      iterators.emplace(insn, it);
    }
  }

  // insert moves to use the forwarded value

  std::vector<std::pair<Forward, IRInstruction*>> to_check;
  for (const auto& f : m_forward) {
    auto& earlier_insns = m_earlier_insns.at(f.earlier_insns_index);

    IRInstruction* insn = f.insn;
    auto& it = iterators.at(insn);

    auto temp_it = temps.find(f.earlier_insns_index);
    if (temp_it == temps.end()) {
      auto move_or_const_literal = get_move_or_const_literal(earlier_insns);
      auto const_opcode = move_or_const_literal.first;
      always_assert(opcode::is_a_literal_const(const_opcode));
      auto literal = *(move_or_const_literal.second);

      // Consider this case:
      //   1. (const v0 0)
      //   2. (iput-object v0 v3 "LClass1;.field:Ljava/lang/Object;")
      //   3. (const v0 0)
      //   4. boxing-unboxing v0
      //   5. (move-result v0)
      //   6. (iput-boolean v0 v4 "LClass2;.field:Z")
      // We need to make sure that after opt-out "boxing-unboxing v0", v0 used
      // in insn 6. should not be the one used in insn 2, since in line 2, v0 is
      // used as null-0 and in line 4, v0 should be int. Therfore, instead of
      // insert a move, if there is any literal_const load in earlier_insns, we
      // just clone that const insn and override the dest to current insn's
      // dest. No need to add this new insn for runtime-assertion since it is
      // just a clone.
      IRInstruction* const_insn = new IRInstruction(const_opcode);
      const_insn->set_literal(literal)->set_dest(insn->dest());
      auto iterators_invalidated = m_cfg.insert_after(it, const_insn);
      always_assert(!iterators_invalidated);

      for (auto earlier_insn : earlier_insns) {
        TRACE(CSE, 4, "[CSE] forwarding %s to %s as const %ld",
              SHOW(earlier_insn), SHOW(insn), literal);
      }
    } else {
      auto [move_opcode, temp_reg] = temp_it->second;
      always_assert(opcode::is_a_move(move_opcode));

      IRInstruction* move_insn = new IRInstruction(move_opcode);
      move_insn->set_src(0, temp_reg)->set_dest(insn->dest());
      auto iterators_invalidated = m_cfg.insert_after(it, move_insn);
      always_assert(!iterators_invalidated);
      if (runtime_assertions) {
        to_check.emplace_back(f, move_insn);
      }

      for (auto earlier_insn : earlier_insns) {
        TRACE(CSE, 4, "[CSE] forwarding %s to %s via v%u", SHOW(earlier_insn),
              SHOW(insn), temp_reg);
      }
    }

    if (opcode::is_move_result_any(insn->opcode())) {
      insn = m_cfg.primary_instruction_of_move_result(it)->insn;
      if (opcode::is_an_invoke(insn->opcode())) {
        TRACE(CSE, 3, "[CSE] eliminating invocation of %s",
              SHOW(insn->get_method()));
      }
    }
    m_stats.eliminated_opcodes[insn->opcode()]++;
  }

  // Insert moves to define the forwarded value.
  // We are going to call ControlFlow::insert_after, which may introduce new
  // blocks when inserting after the last instruction of a block with
  // throws-edges. To make that deterministic, we need to make sure that we
  // insert in a deterministic order. To this end, we follow the deterministic
  // m_forward order, and we order earlier instructions by their ids.
  for (const auto& f : m_forward) {
    auto temp_it = temps.find(f.earlier_insns_index);
    if (temp_it == temps.end()) {
      continue;
    }
    auto [move_opcode, temp_reg] = temp_it->second;
    always_assert(opcode::is_a_move(move_opcode));
    temps.erase(temp_it);

    auto& earlier_insns_unordered = m_earlier_insns.at(f.earlier_insns_index);
    std::vector<const IRInstruction*> earlier_insns_ordered(
        earlier_insns_unordered.begin(), earlier_insns_unordered.end());
    always_assert(!m_earlier_insn_ids.empty());
    std::sort(earlier_insns_ordered.begin(), earlier_insns_ordered.end(),
              [&](const auto* a, const auto* b) {
                return get_earlier_insn_id(a) < get_earlier_insn_id(b);
              });

    for (auto earlier_insn : earlier_insns_ordered) {
      auto& it = iterators.at(const_cast<IRInstruction*>(earlier_insn));
      IRInstruction* move_insn = new IRInstruction(move_opcode);
      auto src_reg = earlier_insn->has_dest() ? earlier_insn->dest()
                                              : earlier_insn->src(0);
      move_insn->set_src(0, src_reg)->set_dest(temp_reg);
      if (earlier_insn->opcode() == OPCODE_NEW_ARRAY) {
        // we need to capture the array-length register of a new-array
        // instruction *before* the instruction, as the dest of the instruction
        // may overwrite the incoming array length value
        auto iterators_invalidated = m_cfg.insert_before(it, move_insn);
        always_assert(!iterators_invalidated);
      } else {
        auto iterators_invalidated = m_cfg.insert_after(it, move_insn);
        always_assert(!iterators_invalidated);
      }
    }
  }
  always_assert(temps.empty());

  if (runtime_assertions) {
    insert_runtime_assertions(to_check);
  }

  TRACE(CSE, 5, "[CSE] after:\n%s", SHOW(m_cfg));

  m_stats.instructions_eliminated += m_forward.size();

  if (!m_unboxing.empty()) {
    // Inserting check-casts might split blocks and invalidate iterators, thus
    // we go through the CFGMutation helper class that properly deals with this
    // complication.

    cfg::CFGMutation mutation(m_cfg);
    // For the abs methods which are part of boxing-unboxing pattern, we refine
    // it to its impl, which will be viewed as "pure" method and can be optmized
    // out later.
    for (auto unboxing_insn : m_unboxing) {
      auto& it = iterators.at(unboxing_insn);
      auto method_ref = unboxing_insn->get_method();
      auto abs_it = std::find_if(
          m_abs_map.begin(), m_abs_map.end(),
          [method_ref](auto& p) { return p.second == method_ref; });
      always_assert(abs_it != m_abs_map.end());
      auto impl = abs_it->first;
      auto src_reg = unboxing_insn->src(0);
      auto check_cast_insn = (new IRInstruction(OPCODE_CHECK_CAST))
                                 ->set_src(0, src_reg)
                                 ->set_type(impl->get_class());
      auto pseudo_move_result_insn =
          (new IRInstruction(IOPCODE_MOVE_RESULT_PSEUDO_OBJECT))
              ->set_dest(src_reg);
      // Add instructions
      mutation.insert_before(it, {check_cast_insn, pseudo_move_result_insn});
      // Replace the call method from abs_unboxing method to its impl.
      unboxing_insn->set_method(const_cast<DexMethodRef*>(impl));
    }
    mutation.flush();
  }

  return true;
}