libredex/IRTypeChecker.cpp (1,316 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #include "IRTypeChecker.h" #include <boost/optional/optional.hpp> #include "Debug.h" #include "DexPosition.h" #include "DexUtil.h" #include "Match.h" #include "MonitorCount.h" #include "RedexContext.h" #include "Resolver.h" #include "ScopedCFG.h" #include "Show.h" #include "ShowCFG.h" #include "StlUtil.h" #include "Trace.h" using namespace sparta; using namespace type_inference; namespace { using namespace ir_analyzer; // We abort the type checking process at the first error encountered. class TypeCheckingException final : public std::runtime_error { public: explicit TypeCheckingException(const std::string& what_arg) : std::runtime_error(what_arg) {} }; std::ostringstream& print_register(std::ostringstream& out, reg_t reg) { if (reg == RESULT_REGISTER) { out << "result"; } else { out << "register v" << reg; } return out; } void check_type_match(reg_t reg, IRType actual, IRType expected) { if (actual == BOTTOM) { // There's nothing to do for unreachable code. return; } if (actual == SCALAR && expected != REFERENCE) { // If the type is SCALAR and we're checking compatibility with an integer // or float type, we just bail out. return; } if (!TypeDomain(actual).leq(TypeDomain(expected))) { std::ostringstream out; print_register(out, reg) << ": expected type " << expected << ", but found " << actual << " instead"; throw TypeCheckingException(out.str()); } } /* * There are cases where we cannot precisely infer the exception type for * MOVE_EXCEPTION. In these cases, we use Ljava/lang/Throwable; as a fallback * type. */ bool is_inference_fallback_type(const DexType* type) { return type == type::java_lang_Throwable(); } /* * We might not have the external DexClass to fully determine the hierarchy. * Therefore, be more lenient when assigning from or to external DexType. */ bool check_cast_helper(const DexType* from, const DexType* to) { // We can always cast to Object if (to == type::java_lang_Object()) { return true; } // We can never cast from Object to anything besides Object if (from == type::java_lang_Object() && from != to) { // TODO(T66567547) sanity check that type::check_cast would have agreed always_assert(!type::check_cast(from, to)); return false; } // If we have any external types (aside from Object and the other well known // types), allow them. auto from_cls = type_class(from); auto to_cls = type_class(to); if (!from_cls || !to_cls) { return true; } // Assume the type hierarchies of the well known external types are stable // across Android versions. When their class definitions present, perform the // regular type inheritance check. if ((from_cls->is_external() && !g_redex->pointers_cache().m_well_known_types.count(from)) || (to_cls->is_external() && !g_redex->pointers_cache().m_well_known_types.count(to))) { return true; } return type::check_cast(from, to); } // Type assignment check between two reference types. We assume that both `from` // and `to` are reference types. // Took reference from: // http://androidxref.com/6.0.1_r10/xref/art/runtime/verifier/reg_type-inl.h#88 // // Note: the expectation is that `from` and `to` are reference types, otherwise // the check fails. bool check_is_assignable_from(const DexType* from, const DexType* to, bool strict) { always_assert(from && to); always_assert_log(!type::is_primitive(from), "%s", SHOW(from)); if (type::is_primitive(from) || type::is_primitive(to)) { return false; // Expect types be a reference type. } if (from == to) { return true; // Fast path if the two are equal. } if (to == type::java_lang_Object()) { return true; // All reference types can be assigned to Object. } if (type::is_java_lang_object_array(to)) { // All reference arrays may be assigned to Object[] return type::is_reference_array(from); } if (type::is_array(from) && type::is_array(to)) { if (type::get_array_level(from) != type::get_array_level(to)) { return false; } auto efrom = type::get_array_element_type(from); auto eto = type::get_array_element_type(to); return check_cast_helper(efrom, eto); } if (!strict) { // If `to` is an interface, allow any assignment when non-strict. // This behavior is copied from AOSP. auto to_cls = type_class(to); if (to_cls != nullptr && is_interface(to_cls)) { return true; } } return check_cast_helper(from, to); } void check_wide_type_match(reg_t reg, IRType actual1, IRType actual2, IRType expected1, IRType expected2) { if (actual1 == BOTTOM) { // There's nothing to do for unreachable code. return; } if (actual1 == SCALAR1 && actual2 == SCALAR2) { // If type of the pair of registers is (SCALAR1, SCALAR2), we just bail // out. return; } if (!(TypeDomain(actual1).leq(TypeDomain(expected1)) && TypeDomain(actual2).leq(TypeDomain(expected2)))) { std::ostringstream out; print_register(out, reg) << ": expected type (" << expected1 << ", " << expected2 << "), but found (" << actual1 << ", " << actual2 << ") instead"; throw TypeCheckingException(out.str()); } } void assume_type(TypeEnvironment* state, reg_t reg, IRType expected, bool ignore_top = false) { if (state->is_bottom()) { // There's nothing to do for unreachable code. return; } IRType actual = state->get_type(reg).element(); if (ignore_top && actual == TOP) { return; } check_type_match(reg, actual, /* expected */ expected); } void assume_wide_type(TypeEnvironment* state, reg_t reg, IRType expected1, IRType expected2) { if (state->is_bottom()) { // There's nothing to do for unreachable code. return; } IRType actual1 = state->get_type(reg).element(); IRType actual2 = state->get_type(reg + 1).element(); check_wide_type_match(reg, actual1, actual2, /* expected1 */ expected1, /* expected2 */ expected2); } // This is used for the operand of a comparison operation with zero. The // complexity here is that this operation may be performed on either an // integer or a reference. void assume_comparable_with_zero(TypeEnvironment* state, reg_t reg) { if (state->is_bottom()) { // There's nothing to do for unreachable code. return; } IRType t = state->get_type(reg).element(); if (t == SCALAR) { // We can't say anything conclusive about a register that has SCALAR type, // so we just bail out. return; } if (!(TypeDomain(t).leq(TypeDomain(REFERENCE)) || TypeDomain(t).leq(TypeDomain(INT)))) { std::ostringstream out; print_register(out, reg) << ": expected integer or reference type, but found " << t << " instead"; throw TypeCheckingException(out.str()); } } // This is used for the operands of a comparison operation between two // registers. The complexity here is that this operation may be performed on // either two integers or two references. void assume_comparable(TypeEnvironment* state, reg_t reg1, reg_t reg2) { if (state->is_bottom()) { // There's nothing to do for unreachable code. return; } IRType t1 = state->get_type(reg1).element(); IRType t2 = state->get_type(reg2).element(); if (!((TypeDomain(t1).leq(TypeDomain(REFERENCE)) && TypeDomain(t2).leq(TypeDomain(REFERENCE))) || (TypeDomain(t1).leq(TypeDomain(SCALAR)) && TypeDomain(t2).leq(TypeDomain(SCALAR)) && (t1 != FLOAT) && (t2 != FLOAT)))) { // Two values can be used in a comparison operation if they either both // have the REFERENCE type or have non-float scalar types. Note that in // the case where one or both types have the SCALAR type, we can't // definitely rule out the absence of a type error. std::ostringstream out; print_register(out, reg1) << " and "; print_register(out, reg2) << ": incompatible types in comparison " << t1 << " and " << t2; throw TypeCheckingException(out.str()); } } void assume_integer(TypeEnvironment* state, reg_t reg) { assume_type(state, reg, /* expected */ INT); } void assume_float(TypeEnvironment* state, reg_t reg) { assume_type(state, reg, /* expected */ FLOAT); } void assume_long(TypeEnvironment* state, reg_t reg) { assume_wide_type(state, reg, /* expected1 */ LONG1, /* expected2 */ LONG2); } void assume_double(TypeEnvironment* state, reg_t reg) { assume_wide_type( state, reg, /* expected1 */ DOUBLE1, /* expected2 */ DOUBLE2); } void assume_wide_scalar(TypeEnvironment* state, reg_t reg) { assume_wide_type( state, reg, /* expected1 */ SCALAR1, /* expected2 */ SCALAR2); } class Result final { public: static Result Ok() { return Result(); } static Result make_error(const std::string& s) { return Result(s); } const std::string& error_message() const { always_assert(!is_ok); return m_error_message; } bool operator==(const Result& that) const { return is_ok == that.is_ok && m_error_message == that.m_error_message; } bool operator!=(const Result& that) const { return !(*this == that); } private: bool is_ok{true}; std::string m_error_message; explicit Result(const std::string& s) : is_ok(false), m_error_message(s) {} Result() = default; }; static bool has_move_result_pseudo(const MethodItemEntry& mie) { return mie.type == MFLOW_OPCODE && mie.insn->has_move_result_pseudo(); } static bool is_move_result_pseudo(const MethodItemEntry& mie) { return mie.type == MFLOW_OPCODE && opcode::is_a_move_result_pseudo(mie.insn->opcode()); } Result check_load_params(const DexMethod* method) { bool is_static_method = is_static(method); const auto* signature = method->get_proto()->get_args(); auto sig_it = signature->begin(); size_t load_insns_cnt = 0; auto handle_instance = [&](IRInstruction* insn) -> boost::optional<std::string> { // Must be a param-object. if (insn->opcode() != IOPCODE_LOAD_PARAM_OBJECT) { return std::string( "First parameter must be loaded with load-param-object: ") + show(insn); } return boost::none; }; auto handle_other = [&](IRInstruction* insn) -> boost::optional<std::string> { if (sig_it == signature->end()) { return std::string("Not enough argument types for ") + show(insn); } bool ok = false; switch (insn->opcode()) { case IOPCODE_LOAD_PARAM_OBJECT: ok = type::is_object(*sig_it); break; case IOPCODE_LOAD_PARAM: ok = type::is_primitive(*sig_it) && !type::is_wide_type(*sig_it); break; case IOPCODE_LOAD_PARAM_WIDE: ok = type::is_primitive(*sig_it) && type::is_wide_type(*sig_it); break; default: not_reached(); } if (!ok) { return std::string("Incompatible load-param ") + show(insn) + " for " + type::type_shorty(*sig_it); } ++sig_it; return boost::none; }; bool non_load_param_seen = false; using handler_t = std::function<boost::optional<std::string>(IRInstruction*)>; handler_t handler = is_static_method ? handler_t(handle_other) : handler_t(handle_instance); for (const auto& mie : InstructionIterable(method->get_code()->cfg().entry_block())) { IRInstruction* insn = mie.insn; if (!opcode::is_a_load_param(insn->opcode())) { non_load_param_seen = true; continue; } ++load_insns_cnt; if (non_load_param_seen) { return Result::make_error("Saw non-load-param instruction before " + show(insn)); } auto res = handler(insn); if (res) { return Result::make_error(res.get()); } // Instance methods have an extra 'load-param' at the beginning for the // instance object. // Once we've checked that, though, the rest is the same so move on to // using 'handle_other' in all cases. handler = handler_t(handle_other); } size_t expected_load_params_cnt = method->get_proto()->get_args()->size() + !is_static_method; if (load_insns_cnt != expected_load_params_cnt) { return Result::make_error( "Number of existing load-param instructions (" + show(load_insns_cnt) + ") is lower than expected (" + show(expected_load_params_cnt) + ")"); } return Result::Ok(); } // Every variable created by a new-instance call should be initialized by a // proper invoke-direct <init>. Here, we perform simple check to find some // missing calls resulting in use of uninitialized variables. We correctly track // variables in a basic block, the most common form of allocation+init. Result check_uninitialized(const DexMethod* method) { auto* code = method->get_code(); std::map<uint16_t, IRInstruction*> uninitialized_regs; std::map<IRInstruction*, std::set<uint16_t>> uninitialized_regs_rev; auto remove_from_uninitialized_list = [&](uint16_t reg) { auto it = uninitialized_regs.find(reg); if (it != uninitialized_regs.end()) { uninitialized_regs_rev[it->second].erase(reg); uninitialized_regs.erase(reg); } }; for (auto it = code->begin(); it != code->end(); ++it) { if (it->type != MFLOW_OPCODE) { continue; } auto* insn = it->insn; auto op = insn->opcode(); if (op == OPCODE_NEW_INSTANCE) { ++it; while (it != code->end() && it->type != MFLOW_OPCODE) ++it; if (it == code->end() || it->insn->opcode() != IOPCODE_MOVE_RESULT_PSEUDO_OBJECT) { auto prev = it; prev--; return Result::make_error("No opcode-move-result after new-instance " + show(*prev) + " in \n" + show(code->cfg())); } auto reg_dest = it->insn->dest(); remove_from_uninitialized_list(reg_dest); uninitialized_regs[reg_dest] = insn; uninitialized_regs_rev[insn].insert(reg_dest); continue; } if (opcode::is_a_move(op) && !opcode::is_move_result_any(op)) { assert(insn->srcs().size() > 0); auto src = insn->srcs()[0]; auto dest = insn->dest(); if (src == dest) continue; auto it_src = uninitialized_regs.find(src); // We no longer care about the old dest remove_from_uninitialized_list(dest); // But if src was uninitialized, dest is now too if (it_src != uninitialized_regs.end()) { uninitialized_regs[dest] = it_src->second; uninitialized_regs_rev[it_src->second].insert(dest); } continue; } auto create_error = [&](const IRInstruction* instruction, const IRCode* code) { return Result::make_error("Use of uninitialized variable " + show(instruction) + " detected at " + show(*it) + " in \n" + show(code->cfg())); }; if (op == OPCODE_INVOKE_DIRECT) { auto const& sources = insn->srcs(); auto object = sources[0]; auto object_it = uninitialized_regs.find(object); if (object_it != uninitialized_regs.end()) { auto* object_ir = object_it->second; if (insn->get_method()->get_name()->str() != "<init>") { return create_error(object_ir, code); } if (insn->get_method()->get_class()->str() != object_ir->get_type()->str()) { return Result::make_error("Variable " + show(object_ir) + "initialized with the wrong type at " + show(*it) + " in \n" + show(code->cfg())); } for (auto reg : uninitialized_regs_rev[object_ir]) { uninitialized_regs.erase(reg); } uninitialized_regs_rev.erase(object_ir); } for (unsigned int i = 1; i < sources.size(); i++) { auto u_it = uninitialized_regs.find(sources[i]); if (u_it != uninitialized_regs.end()) return create_error(u_it->second, code); } continue; } auto const& sources = insn->srcs(); for (auto reg : sources) { auto u_it = uninitialized_regs.find(reg); if (u_it != uninitialized_regs.end()) return create_error(u_it->second, code); } if (insn->has_dest()) remove_from_uninitialized_list(insn->dest()); // We clear the structures after any branch, this doesn't cover all the // possible issues, but is simple auto branchingness = opcode::branchingness(op); if (op == OPCODE_THROW || (branchingness != opcode::Branchingness::BRANCH_NONE && branchingness != opcode::Branchingness::BRANCH_THROW)) { uninitialized_regs.clear(); uninitialized_regs_rev.clear(); } } return Result::Ok(); } /* * Do a linear pass to sanity-check the structure of the bytecode. */ Result check_structure(const DexMethod* method, bool check_no_overwrite_this) { check_no_overwrite_this &= !is_static(method); auto* code = method->get_code(); IRInstruction* this_insn = nullptr; bool has_seen_non_load_param_opcode{false}; for (auto it = code->begin(); it != code->end(); ++it) { // XXX we are using IRList::iterator instead of InstructionIterator here // because the latter does not support reverse iteration if (it->type != MFLOW_OPCODE) { continue; } auto* insn = it->insn; auto op = insn->opcode(); if (has_seen_non_load_param_opcode && opcode::is_a_load_param(op)) { return Result::make_error("Encountered " + show(*it) + " not at the start of the method"); } has_seen_non_load_param_opcode = !opcode::is_a_load_param(op); if (check_no_overwrite_this) { if (op == IOPCODE_LOAD_PARAM_OBJECT && this_insn == nullptr) { this_insn = insn; } else if (insn->has_dest() && insn->dest() == this_insn->dest()) { return Result::make_error( "Encountered overwrite of `this` register by " + show(insn)); } } // The instruction immediately before a move-result instruction must be // either an invoke-* or a filled-new-array instruction. if (opcode::is_a_move_result(op)) { auto prev = it; while (prev != code->begin()) { --prev; if (prev->type == MFLOW_OPCODE) { break; } } if (it == code->begin() || prev->type != MFLOW_OPCODE) { return Result::make_error("Encountered " + show(*it) + " at start of the method"); } auto prev_op = prev->insn->opcode(); if (!(opcode::is_an_invoke(prev_op) || opcode::is_filled_new_array(prev_op))) { return Result::make_error( "Encountered " + show(*it) + " without appropriate prefix " "instruction. Expected invoke or filled-new-array, got " + show(prev->insn)); } } else if (opcode::is_a_move_result_pseudo(insn->opcode()) && (it == code->begin() || !has_move_result_pseudo(*std::prev(it)))) { return Result::make_error("Encountered " + show(*it) + " without appropriate prefix " "instruction"); } else if (insn->has_move_result_pseudo() && (it == code->end() || std::next(it) == code->end() || !is_move_result_pseudo(*std::next(it)))) { return Result::make_error("Did not find move-result-pseudo after " + show(*it) + " in \n" + show(code)); } } return check_uninitialized(method); } /* * Do a linear pass to sanity-check the structure of the positions. */ Result check_positions(const DexMethod* method) { auto code = method->get_code(); std::unordered_set<DexPosition*> positions; for (auto it = code->begin(); it != code->end(); ++it) { if (it->type != MFLOW_POSITION) { continue; } auto pos = it->pos.get(); if (!positions.insert(pos).second) { return Result::make_error("Duplicate position " + show(pos)); } } std::unordered_set<DexPosition*> visited_parents; for (auto pos : positions) { if (!pos->parent) { continue; } if (!positions.count(pos->parent)) { return Result::make_error("Missing parent " + show(pos)); } for (auto p = pos; p; p = p->parent) { if (!visited_parents.insert(p).second) { return Result::make_error("Cyclic parents around " + show(pos)); } } visited_parents.clear(); } return Result::Ok(); } /* * For now, we only check if there are... * - mismatches in the monitor stack depth * - instructions that may throw in a synchronized region in a try-block without * a catch-all. */ Result check_monitors(const DexMethod* method) { auto code = method->get_code(); monitor_count::Analyzer monitor_analyzer(code->cfg()); auto blocks = monitor_analyzer.get_monitor_mismatches(); if (!blocks.empty()) { std::ostringstream out; out << "Monitor-stack mismatch (unverifiable code) in " << method->get_deobfuscated_name_or_empty() << " at blocks "; for (auto b : blocks) { out << "("; for (auto e : b->preds()) { auto count = monitor_analyzer.get_exit_state_at(e->src()); count = monitor_analyzer.analyze_edge(e, count); if (!count.is_bottom()) { out << "B" << e->src()->id() << ":" << show(count) << " | "; } } auto count = monitor_analyzer.get_entry_state_at(b); out << ") ==> B" << b->id() << ":" << show(count) << ", "; } out << " in\n" + show(code->cfg()); return Result::make_error(out.str()); } auto sketchy_insns = monitor_analyzer.get_sketchy_instructions(); std::unordered_set<cfg::Block*> sketchy_blocks; for (auto& it : sketchy_insns) { sketchy_blocks.insert(it.block()); } std20::erase_if(sketchy_blocks, [&](auto* b) { return !code->cfg().get_succ_edge_of_type(b, cfg::EDGE_THROW); }); if (!sketchy_blocks.empty()) { std::ostringstream out; out << "Throwing instructions in a synchronized region in a try-block " "without a catch-all in " << method->get_deobfuscated_name_or_empty(); bool first = true; for (auto& it : sketchy_insns) { if (!sketchy_blocks.count(it.block())) { continue; } if (first) { first = false; } else { out << " and "; } out << " at instruction B" << it.block()->id() << " '" << SHOW(it->insn) << "' @ " << std::hex << static_cast<const void*>(&*it.unwrap()); } out << " in\n" + show(code->cfg()); return Result::make_error(out.str()); } return Result::Ok(); } /** * Validate if the caller has the permit to call a method or access a field. * * +-------------------------+--------+----------+-----------+-------+ * | Access Levels Modifier | Class | Package | Subclass | World | * +-------------------------+--------+----------+-----------+-------+ * | public | Y | Y | Y | Y | * | protected | Y | Y | Y | N | * | no modifier | Y | Y | N | N | * | private | Y | N | N | N | * +-------------------------+--------+----------+-----------+-------+ */ template <typename DexMember> void validate_access(const DexMethod* accessor, const DexMember* accessee) { auto accessor_class = accessor->get_class(); if (accessee == nullptr || is_public(accessee) || accessor_class == accessee->get_class()) { return; } if (!is_private(accessee)) { auto accessee_class = accessee->get_class(); auto from_same_package = type::same_package(accessor_class, accessee_class); if (is_package_private(accessee) && from_same_package) { return; } else if (is_protected(accessee) && (from_same_package || type::check_cast(accessor_class, accessee_class))) { return; } } std::ostringstream out; out << "\nillegal access to " << (is_private(accessee) ? "private " : (is_package_private(accessee) ? "package-private " : "protected ")) << show_deobfuscated(accessee) << "\n from " << show_deobfuscated(accessor); // If the accessee is external, we don't report the error, just log it. // TODO(fengliu): We should enforce the correctness when visiting external dex // members. if (accessee->is_external()) { TRACE(TYPE, 2, "%s", out.str().c_str()); return; } throw TypeCheckingException(out.str()); } void validate_invoke_super(const DexMethodRef* callee) { auto callee_cls = type_class(callee->get_class()); if (!callee_cls || !is_interface(callee_cls)) { return; } std::ostringstream out; out << "\nillegal invoke-super to interface method defined in class " << show_deobfuscated(callee_cls) << "(note that this can happen when external framework SDKs are not " "passed to D8 as a classpath dependency; in such cases D8 may " "silently generate illegal invoke-supers to interface methods)"; throw TypeCheckingException(out.str()); } } // namespace IRTypeChecker::~IRTypeChecker() {} IRTypeChecker::IRTypeChecker(DexMethod* dex_method, bool validate_access, bool validate_invoke_super) : m_dex_method(dex_method), m_validate_access(validate_access), m_validate_invoke_super(validate_invoke_super), m_complete(false), m_verify_moves(false), m_check_no_overwrite_this(false), m_good(true), m_what("OK") {} void IRTypeChecker::run() { IRCode* code = m_dex_method->get_code(); if (m_complete) { // The type checker can only be run once on any given method. return; } if (code == nullptr) { // If the method has no associated code, the type checking trivially // succeeds. m_complete = true; return; } code->build_cfg(/* editable */ false); auto result = check_structure(m_dex_method, m_check_no_overwrite_this); if (result != Result::Ok()) { m_complete = true; m_good = false; m_what = result.error_message(); return; } // We then infer types for all the registers used in the method. const cfg::ControlFlowGraph& cfg = code->cfg(); // Check that the load-params match the signature. auto params_result = check_load_params(m_dex_method); if (params_result != Result::Ok()) { m_complete = true; m_good = false; m_what = params_result.error_message(); return; } m_type_inference = std::make_unique<TypeInference>(cfg); m_type_inference->run(m_dex_method); // Finally, we use the inferred types to type-check each instruction in the // method. We stop at the first type error encountered. auto& type_envs = m_type_inference->get_type_environments(); for (const MethodItemEntry& mie : InstructionIterable(code)) { IRInstruction* insn = mie.insn; try { auto it = type_envs.find(insn); always_assert_log( it != type_envs.end(), "%s in:\n%s", SHOW(mie), SHOW(code)); check_instruction(insn, &it->second); } catch (const TypeCheckingException& e) { m_good = false; std::ostringstream out; out << "Type error in method " << m_dex_method->get_deobfuscated_name_or_empty() << " at instruction '" << SHOW(insn) << "' @ " << std::hex << static_cast<const void*>(&mie) << " for " << e.what(); m_what = out.str(); m_complete = true; return; } } auto positions_result = check_positions(m_dex_method); if (positions_result != Result::Ok()) { m_complete = true; m_good = false; m_what = positions_result.error_message(); return; } auto monitors_result = check_monitors(m_dex_method); if (monitors_result != Result::Ok()) { m_complete = true; m_good = false; m_what = monitors_result.error_message(); return; } m_complete = true; if (traceEnabled(TYPE, 9)) { std::ostringstream out; m_type_inference->print(out); TRACE(TYPE, 9, "%s", out.str().c_str()); } } void IRTypeChecker::assume_scalar(TypeEnvironment* state, reg_t reg, bool in_move) const { assume_type(state, reg, /* expected */ SCALAR, /* ignore_top */ in_move && !m_verify_moves); } void IRTypeChecker::assume_reference(TypeEnvironment* state, reg_t reg, bool in_move) const { assume_type(state, reg, /* expected */ REFERENCE, /* ignore_top */ in_move && !m_verify_moves); } void IRTypeChecker::assume_assignable(boost::optional<const DexType*> from, DexType* to) const { // There are some cases in type inference where we have to give up // and claim we don't know anything about a dex type. See // IRTypeCheckerTest.joinCommonBaseWithConflictingInterface, for // example - the last invoke of 'base.foo()' after the blocks join - // we no longer know anything about the type of the reference. It's // in such a case as that that we have to bail out here when the from // optional is empty. if (from && !check_is_assignable_from(*from, to, false)) { std::ostringstream out; out << ": " << *from << " is not assignable to " << to << std::endl; throw TypeCheckingException(out.str()); } } // This method performs type checking only: the type environment is not updated // and the source registers of the instruction are checked against their // expected types. // // Similarly, the various assume_* functions used throughout the code to check // that the inferred type of a register matches with its expected type, as // derived from the context. void IRTypeChecker::check_instruction(IRInstruction* insn, TypeEnvironment* current_state) const { switch (insn->opcode()) { case IOPCODE_LOAD_PARAM: case IOPCODE_LOAD_PARAM_OBJECT: case IOPCODE_LOAD_PARAM_WIDE: { // IOPCODE_LOAD_PARAM_* instructions have been processed before the // analysis. break; } case OPCODE_NOP: { break; } case OPCODE_MOVE: { assume_scalar(current_state, insn->src(0), /* in_move */ true); break; } case OPCODE_MOVE_OBJECT: { assume_reference(current_state, insn->src(0), /* in_move */ true); break; } case OPCODE_MOVE_WIDE: { assume_wide_scalar(current_state, insn->src(0)); break; } case IOPCODE_MOVE_RESULT_PSEUDO: case OPCODE_MOVE_RESULT: { assume_scalar(current_state, RESULT_REGISTER); break; } case IOPCODE_MOVE_RESULT_PSEUDO_OBJECT: case OPCODE_MOVE_RESULT_OBJECT: { assume_reference(current_state, RESULT_REGISTER); break; } case IOPCODE_MOVE_RESULT_PSEUDO_WIDE: case OPCODE_MOVE_RESULT_WIDE: { assume_wide_scalar(current_state, RESULT_REGISTER); break; } case OPCODE_MOVE_EXCEPTION: { // We don't know where to grab the type of the just-caught exception. // Simply set to j.l.Throwable here. break; } case OPCODE_RETURN_VOID: { break; } case OPCODE_RETURN: { assume_scalar(current_state, insn->src(0)); break; } case OPCODE_RETURN_WIDE: { assume_wide_scalar(current_state, insn->src(0)); break; } case OPCODE_RETURN_OBJECT: { assume_reference(current_state, insn->src(0)); auto dtype = current_state->get_dex_type(insn->src(0)); auto rtype = m_dex_method->get_proto()->get_rtype(); // If the inferred type is a fallback, there's no point performing the // accurate type assignment checking. if (dtype && !is_inference_fallback_type(*dtype)) { // Return type checking is non-strict: it is allowed to return any // reference type when `rtype` is an interface. if (!check_is_assignable_from(*dtype, rtype, /*strict=*/false)) { std::ostringstream out; out << "Returning " << dtype << ", but expected from declaration " << rtype << std::endl; throw TypeCheckingException(out.str()); } } break; } case OPCODE_CONST: { break; } case OPCODE_CONST_WIDE: { break; } case OPCODE_CONST_STRING: { break; } case OPCODE_CONST_CLASS: { break; } case OPCODE_MONITOR_ENTER: case OPCODE_MONITOR_EXIT: { assume_reference(current_state, insn->src(0)); break; } case OPCODE_CHECK_CAST: { assume_reference(current_state, insn->src(0)); break; } case OPCODE_INSTANCE_OF: case OPCODE_ARRAY_LENGTH: { assume_reference(current_state, insn->src(0)); break; } case OPCODE_NEW_INSTANCE: { break; } case OPCODE_NEW_ARRAY: { assume_integer(current_state, insn->src(0)); break; } case OPCODE_FILLED_NEW_ARRAY: { const DexType* element_type = type::get_array_component_type(insn->get_type()); // We assume that structural constraints on the bytecode are satisfied, // i.e., the type is indeed an array type. always_assert(element_type != nullptr); bool is_array_of_references = type::is_object(element_type); for (size_t i = 0; i < insn->srcs_size(); ++i) { if (is_array_of_references) { assume_reference(current_state, insn->src(i)); } else { assume_scalar(current_state, insn->src(i)); } } break; } case OPCODE_FILL_ARRAY_DATA: { break; } case OPCODE_THROW: { assume_reference(current_state, insn->src(0)); break; } case OPCODE_GOTO: { break; } case OPCODE_SWITCH: { assume_integer(current_state, insn->src(0)); break; } case OPCODE_CMPL_FLOAT: case OPCODE_CMPG_FLOAT: { assume_float(current_state, insn->src(0)); assume_float(current_state, insn->src(1)); break; } case OPCODE_CMPL_DOUBLE: case OPCODE_CMPG_DOUBLE: { assume_double(current_state, insn->src(0)); assume_double(current_state, insn->src(1)); break; } case OPCODE_CMP_LONG: { assume_long(current_state, insn->src(0)); assume_long(current_state, insn->src(1)); break; } case OPCODE_IF_EQ: case OPCODE_IF_NE: { assume_comparable(current_state, insn->src(0), insn->src(1)); break; } case OPCODE_IF_LT: case OPCODE_IF_GE: case OPCODE_IF_GT: case OPCODE_IF_LE: { assume_integer(current_state, insn->src(0)); assume_integer(current_state, insn->src(1)); break; } case OPCODE_IF_EQZ: case OPCODE_IF_NEZ: { assume_comparable_with_zero(current_state, insn->src(0)); break; } case OPCODE_IF_LTZ: case OPCODE_IF_GEZ: case OPCODE_IF_GTZ: case OPCODE_IF_LEZ: { assume_integer(current_state, insn->src(0)); break; } case OPCODE_AGET: { assume_reference(current_state, insn->src(0)); assume_integer(current_state, insn->src(1)); break; } case OPCODE_AGET_BOOLEAN: case OPCODE_AGET_BYTE: case OPCODE_AGET_CHAR: case OPCODE_AGET_SHORT: { assume_reference(current_state, insn->src(0)); assume_integer(current_state, insn->src(1)); break; } case OPCODE_AGET_WIDE: { assume_reference(current_state, insn->src(0)); assume_integer(current_state, insn->src(1)); break; } case OPCODE_AGET_OBJECT: { assume_reference(current_state, insn->src(0)); assume_integer(current_state, insn->src(1)); break; } case OPCODE_APUT: { assume_scalar(current_state, insn->src(0)); assume_reference(current_state, insn->src(1)); assume_integer(current_state, insn->src(2)); break; } case OPCODE_APUT_BOOLEAN: case OPCODE_APUT_BYTE: case OPCODE_APUT_CHAR: case OPCODE_APUT_SHORT: { assume_integer(current_state, insn->src(0)); assume_reference(current_state, insn->src(1)); assume_integer(current_state, insn->src(2)); break; } case OPCODE_APUT_WIDE: { assume_wide_scalar(current_state, insn->src(0)); assume_reference(current_state, insn->src(1)); assume_integer(current_state, insn->src(2)); break; } case OPCODE_APUT_OBJECT: { assume_reference(current_state, insn->src(0)); assume_reference(current_state, insn->src(1)); assume_integer(current_state, insn->src(2)); break; } case OPCODE_IGET: { assume_reference(current_state, insn->src(0)); const auto f_cls = insn->get_field()->get_class(); assume_assignable(current_state->get_dex_type(insn->src(0)), f_cls); break; } case OPCODE_IGET_BOOLEAN: case OPCODE_IGET_BYTE: case OPCODE_IGET_CHAR: case OPCODE_IGET_SHORT: case OPCODE_IGET_WIDE: { assume_reference(current_state, insn->src(0)); const auto f_cls = insn->get_field()->get_class(); assume_assignable(current_state->get_dex_type(insn->src(0)), f_cls); break; } case OPCODE_IGET_OBJECT: { assume_reference(current_state, insn->src(0)); always_assert(insn->has_field()); const auto f_cls = insn->get_field()->get_class(); assume_assignable(current_state->get_dex_type(insn->src(0)), f_cls); break; } case OPCODE_IPUT: { const DexType* type = insn->get_field()->get_type(); if (type::is_float(type)) { assume_float(current_state, insn->src(0)); } else { assume_integer(current_state, insn->src(0)); } assume_reference(current_state, insn->src(1)); const auto f_cls = insn->get_field()->get_class(); assume_assignable(current_state->get_dex_type(insn->src(1)), f_cls); break; } case OPCODE_IPUT_BOOLEAN: case OPCODE_IPUT_BYTE: case OPCODE_IPUT_CHAR: case OPCODE_IPUT_SHORT: { assume_integer(current_state, insn->src(0)); assume_reference(current_state, insn->src(1)); const auto f_cls = insn->get_field()->get_class(); assume_assignable(current_state->get_dex_type(insn->src(1)), f_cls); break; } case OPCODE_IPUT_WIDE: { assume_wide_scalar(current_state, insn->src(0)); assume_reference(current_state, insn->src(1)); const auto f_cls = insn->get_field()->get_class(); assume_assignable(current_state->get_dex_type(insn->src(1)), f_cls); break; } case OPCODE_IPUT_OBJECT: { assume_reference(current_state, insn->src(0)); assume_reference(current_state, insn->src(1)); always_assert(insn->has_field()); const auto f_type = insn->get_field()->get_type(); assume_assignable(current_state->get_dex_type(insn->src(0)), f_type); const auto f_cls = insn->get_field()->get_class(); assume_assignable(current_state->get_dex_type(insn->src(1)), f_cls); break; } case OPCODE_SGET: { break; } case OPCODE_SGET_BOOLEAN: case OPCODE_SGET_BYTE: case OPCODE_SGET_CHAR: case OPCODE_SGET_SHORT: { break; } case OPCODE_SGET_WIDE: { break; } case OPCODE_SGET_OBJECT: { break; } case OPCODE_SPUT: { const DexType* type = insn->get_field()->get_type(); if (type::is_float(type)) { assume_float(current_state, insn->src(0)); } else { assume_integer(current_state, insn->src(0)); } break; } case OPCODE_SPUT_BOOLEAN: case OPCODE_SPUT_BYTE: case OPCODE_SPUT_CHAR: case OPCODE_SPUT_SHORT: { assume_integer(current_state, insn->src(0)); break; } case OPCODE_SPUT_WIDE: { assume_wide_scalar(current_state, insn->src(0)); break; } case OPCODE_SPUT_OBJECT: { assume_reference(current_state, insn->src(0)); break; } case OPCODE_INVOKE_CUSTOM: case OPCODE_INVOKE_POLYMORPHIC: case OPCODE_INVOKE_VIRTUAL: case OPCODE_INVOKE_SUPER: case OPCODE_INVOKE_DIRECT: case OPCODE_INVOKE_STATIC: case OPCODE_INVOKE_INTERFACE: { DexMethodRef* dex_method = insn->get_method(); const auto* arg_types = dex_method->get_proto()->get_args(); size_t expected_args = (insn->opcode() != OPCODE_INVOKE_STATIC ? 1 : 0) + arg_types->size(); if (insn->srcs_size() != expected_args) { std::ostringstream out; out << SHOW(insn) << ": argument count mismatch; " << "expected " << expected_args << ", " << "but found " << insn->srcs_size() << " instead"; throw TypeCheckingException(out.str()); } size_t src_idx{0}; if (insn->opcode() != OPCODE_INVOKE_STATIC) { // The first argument is a reference to the object instance on which the // method is invoked. auto src = insn->src(src_idx++); assume_reference(current_state, src); assume_assignable(current_state->get_dex_type(src), dex_method->get_class()); } for (DexType* arg_type : *arg_types) { if (type::is_object(arg_type)) { auto src = insn->src(src_idx++); assume_reference(current_state, src); assume_assignable(current_state->get_dex_type(src), arg_type); continue; } if (type::is_integer(arg_type)) { assume_integer(current_state, insn->src(src_idx++)); continue; } if (type::is_long(arg_type)) { assume_long(current_state, insn->src(src_idx++)); continue; } if (type::is_float(arg_type)) { assume_float(current_state, insn->src(src_idx++)); continue; } always_assert(type::is_double(arg_type)); assume_double(current_state, insn->src(src_idx++)); } if (m_validate_access) { auto resolved = resolve_method(dex_method, opcode_to_search(insn), m_dex_method); ::validate_access(m_dex_method, resolved); } if (m_validate_invoke_super && insn->opcode() == OPCODE_INVOKE_SUPER) { validate_invoke_super(dex_method); } break; } case OPCODE_NEG_INT: case OPCODE_NOT_INT: { assume_integer(current_state, insn->src(0)); break; } case OPCODE_NEG_LONG: case OPCODE_NOT_LONG: { assume_long(current_state, insn->src(0)); break; } case OPCODE_NEG_FLOAT: { assume_float(current_state, insn->src(0)); break; } case OPCODE_NEG_DOUBLE: { assume_double(current_state, insn->src(0)); break; } case OPCODE_INT_TO_BYTE: case OPCODE_INT_TO_CHAR: case OPCODE_INT_TO_SHORT: { assume_integer(current_state, insn->src(0)); break; } case OPCODE_LONG_TO_INT: { assume_long(current_state, insn->src(0)); break; } case OPCODE_FLOAT_TO_INT: { assume_float(current_state, insn->src(0)); break; } case OPCODE_DOUBLE_TO_INT: { assume_double(current_state, insn->src(0)); break; } case OPCODE_INT_TO_LONG: { assume_integer(current_state, insn->src(0)); break; } case OPCODE_FLOAT_TO_LONG: { assume_float(current_state, insn->src(0)); break; } case OPCODE_DOUBLE_TO_LONG: { assume_double(current_state, insn->src(0)); break; } case OPCODE_INT_TO_FLOAT: { assume_integer(current_state, insn->src(0)); break; } case OPCODE_LONG_TO_FLOAT: { assume_long(current_state, insn->src(0)); break; } case OPCODE_DOUBLE_TO_FLOAT: { assume_double(current_state, insn->src(0)); break; } case OPCODE_INT_TO_DOUBLE: { assume_integer(current_state, insn->src(0)); break; } case OPCODE_LONG_TO_DOUBLE: { assume_long(current_state, insn->src(0)); break; } case OPCODE_FLOAT_TO_DOUBLE: { assume_float(current_state, insn->src(0)); break; } case OPCODE_ADD_INT: case OPCODE_SUB_INT: case OPCODE_MUL_INT: case OPCODE_AND_INT: case OPCODE_OR_INT: case OPCODE_XOR_INT: case OPCODE_SHL_INT: case OPCODE_SHR_INT: case OPCODE_USHR_INT: { assume_integer(current_state, insn->src(0)); assume_integer(current_state, insn->src(1)); break; } case OPCODE_DIV_INT: case OPCODE_REM_INT: { assume_integer(current_state, insn->src(0)); assume_integer(current_state, insn->src(1)); break; } case OPCODE_ADD_LONG: case OPCODE_SUB_LONG: case OPCODE_MUL_LONG: case OPCODE_AND_LONG: case OPCODE_OR_LONG: case OPCODE_XOR_LONG: { assume_long(current_state, insn->src(0)); assume_long(current_state, insn->src(1)); break; } case OPCODE_DIV_LONG: case OPCODE_REM_LONG: { assume_long(current_state, insn->src(0)); assume_long(current_state, insn->src(1)); break; } case OPCODE_SHL_LONG: case OPCODE_SHR_LONG: case OPCODE_USHR_LONG: { assume_long(current_state, insn->src(0)); assume_integer(current_state, insn->src(1)); break; } case OPCODE_ADD_FLOAT: case OPCODE_SUB_FLOAT: case OPCODE_MUL_FLOAT: case OPCODE_DIV_FLOAT: case OPCODE_REM_FLOAT: { assume_float(current_state, insn->src(0)); assume_float(current_state, insn->src(1)); break; } case OPCODE_ADD_DOUBLE: case OPCODE_SUB_DOUBLE: case OPCODE_MUL_DOUBLE: case OPCODE_DIV_DOUBLE: case OPCODE_REM_DOUBLE: { assume_double(current_state, insn->src(0)); assume_double(current_state, insn->src(1)); break; } case OPCODE_ADD_INT_LIT16: case OPCODE_RSUB_INT: case OPCODE_MUL_INT_LIT16: case OPCODE_AND_INT_LIT16: case OPCODE_OR_INT_LIT16: case OPCODE_XOR_INT_LIT16: case OPCODE_ADD_INT_LIT8: case OPCODE_RSUB_INT_LIT8: case OPCODE_MUL_INT_LIT8: case OPCODE_AND_INT_LIT8: case OPCODE_OR_INT_LIT8: case OPCODE_XOR_INT_LIT8: case OPCODE_SHL_INT_LIT8: case OPCODE_SHR_INT_LIT8: case OPCODE_USHR_INT_LIT8: { assume_integer(current_state, insn->src(0)); break; } case OPCODE_DIV_INT_LIT16: case OPCODE_REM_INT_LIT16: case OPCODE_DIV_INT_LIT8: case OPCODE_REM_INT_LIT8: { assume_integer(current_state, insn->src(0)); break; } case IOPCODE_INIT_CLASS: { break; } } if (insn->has_field() && m_validate_access) { auto search = opcode::is_an_sfield_op(insn->opcode()) ? FieldSearch::Static : FieldSearch::Instance; auto resolved = resolve_field(insn->get_field(), search); ::validate_access(m_dex_method, resolved); } } IRType IRTypeChecker::get_type(IRInstruction* insn, reg_t reg) const { check_completion(); auto& type_envs = m_type_inference->get_type_environments(); auto it = type_envs.find(insn); if (it == type_envs.end()) { // The instruction doesn't belong to this method. We treat this as // unreachable code and return BOTTOM. return BOTTOM; } return it->second.get_type(reg).element(); } boost::optional<const DexType*> IRTypeChecker::get_dex_type(IRInstruction* insn, reg_t reg) const { check_completion(); auto& type_envs = m_type_inference->get_type_environments(); auto it = type_envs.find(insn); if (it == type_envs.end()) { // The instruction doesn't belong to this method. We treat this as // unreachable code and return BOTTOM. return nullptr; } return it->second.get_dex_type(reg); } std::ostream& operator<<(std::ostream& output, const IRTypeChecker& checker) { checker.m_type_inference->print(output); return output; } void IRTypeChecker::check_completion() const { always_assert_log(m_complete, "The type checker did not run on method %s.\n", m_dex_method->get_deobfuscated_name_or_empty().c_str()); } std::string IRTypeChecker::dump_annotated_cfg(DexMethod* method) const { cfg::ScopedCFG cfg{method->get_code()}; TypeInference inf{method->get_code()->cfg()}; inf.run(m_dex_method); return show_analysis<TypeEnvironment>(method->get_code()->cfg(), inf); } std::string IRTypeChecker::dump_annotated_cfg_reduced(DexMethod* method) const { cfg::ScopedCFG cfg{method->get_code()}; TypeInference inf{method->get_code()->cfg()}; inf.run(m_dex_method); struct TypeInferenceReducedSpecial { TypeEnvironment cur; const TypeInference& iter; explicit TypeInferenceReducedSpecial(const TypeInference& iter) : iter(iter) {} void add_reg(std::ostream& os, reg_t r) const { os << " v" << r << "="; auto type = cur.get_type(r); os << type << "/"; auto dtype = cur.get_dex_type(r); if (dtype) { os << show(*dtype); } else { os << "T"; } } void mie_before(std::ostream& os, const MethodItemEntry& mie) {} void mie_after(std::ostream& os, const MethodItemEntry& mie) { if (mie.type != MFLOW_OPCODE) { return; } // Find inputs. if (mie.insn->srcs_size() != 0) { os << " inputs:"; for (reg_t r : mie.insn->srcs()) { add_reg(os, r); } os << "\n"; } iter.analyze_instruction(mie.insn, &cur); cur.reduce(); // Find outputs. if (mie.insn->has_dest()) { os << " output:"; add_reg(os, mie.insn->dest()); os << "\n"; } } void start_block(std::ostream& os, cfg::Block* b) { cur = iter.get_entry_state_at(b); os << "entry state: " << cur << "\n"; } void end_block(std::ostream& os, cfg::Block* b) {} }; TypeInferenceReducedSpecial special(inf); return show(method->get_code()->cfg(), special); }