libredex/PointsToSemantics.cpp (1,342 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #include "PointsToSemantics.h" #include <algorithm> #include <cstdint> #include <deque> #include <fstream> #include <iomanip> #include <iterator> #include <limits> #include <memory> #include <ostream> #include <sstream> #include <tuple> #include <unordered_map> #include <unordered_set> #include <utility> #include <boost/container/flat_set.hpp> #include <boost/functional/hash.hpp> #include <boost/functional/hash_fwd.hpp> #include <boost/optional.hpp> #include "BaseIRAnalyzer.h" #include "ControlFlow.h" #include "Debug.h" #include "DexAccess.h" #include "DexClass.h" #include "DexUtil.h" #include "IRCode.h" #include "IRInstruction.h" #include "IROpcode.h" #include "Macros.h" #include "PatriciaTreeMapAbstractEnvironment.h" #include "PatriciaTreeSetAbstractDomain.h" #include "PointsToSemanticsUtils.h" #include "RedexContext.h" #include "Show.h" #include "Trace.h" #include "Walkers.h" using namespace sparta; s_expr PointsToVariable::to_s_expr() const { return s_expr({s_expr("V"), s_expr(m_id)}); } boost::optional<PointsToVariable> PointsToVariable::from_s_expr( const s_expr& e) { int32_t id; if (!s_patn({s_patn("V"), s_patn(&id)}).match_with(e)) { return {}; } return {PointsToVariable(id)}; } size_t hash_value(const PointsToVariable& v) { boost::hash<int32_t> hasher; return hasher(v.m_id); } bool operator==(const PointsToVariable& v, const PointsToVariable& w) { return v.m_id == w.m_id; } bool operator<(const PointsToVariable& v, const PointsToVariable& w) { return v.m_id < w.m_id; } std::ostream& operator<<(std::ostream& o, const PointsToVariable& v) { if (v.m_id == PointsToVariable::null_var_id()) { o << "NULL"; } else if (v.m_id == PointsToVariable::this_var_id()) { o << "THIS"; } else { o << "V" << v.m_id; } return o; } namespace pts_impl { /* clang-format off */ #define OP_STRING_TABLE \ { \ OP_STRING(PTS_CONST_STRING), \ OP_STRING(PTS_CONST_CLASS), \ OP_STRING(PTS_NEW_OBJECT), \ OP_STRING(PTS_GET_CLASS), \ OP_STRING(PTS_CHECK_CAST), \ OP_STRING(PTS_GET_EXCEPTION), \ OP_STRING(PTS_LOAD_PARAM), \ OP_STRING(PTS_RETURN), \ OP_STRING(PTS_DISJUNCTION), \ OP_STRING(PTS_IGET), \ OP_STRING(PTS_SGET), \ OP_STRING(PTS_IPUT), \ OP_STRING(PTS_SPUT), \ OP_STRING(PTS_IGET_SPECIAL), \ OP_STRING(PTS_IPUT_SPECIAL), \ OP_STRING(PTS_INVOKE_VIRTUAL), \ OP_STRING(PTS_INVOKE_SUPER), \ OP_STRING(PTS_INVOKE_DIRECT), \ OP_STRING(PTS_INVOKE_INTERFACE), \ OP_STRING(PTS_INVOKE_STATIC), \ } \ #define OP_STRING(X) {X, #X} std::unordered_map<PointsToOperationKind, std::string, std::hash<int>> op_to_string_table = OP_STRING_TABLE; #undef OP_STRING #define OP_STRING(X) {#X, X} std::unordered_map<std::string, PointsToOperationKind> string_to_op_table = OP_STRING_TABLE; #undef OP_STRING /* clang-format on */ s_expr op_kind_to_s_expr(PointsToOperationKind kind) { auto it = op_to_string_table.find(kind); always_assert(it != op_to_string_table.end()); return s_expr(it->second); } boost::optional<PointsToOperationKind> string_to_op_kind( const std::string& str) { auto it = string_to_op_table.find(str); if (it == string_to_op_table.end()) { return {}; } return boost::optional<PointsToOperationKind>(it->second); } s_expr special_edge_to_s_expr(SpecialPointsToEdge edge) { switch (edge) { case PTS_ARRAY_ELEMENT: { return s_expr("PTS_ARRAY_ELEMENT"); } } } boost::optional<SpecialPointsToEdge> string_to_special_edge( const std::string& str) { if (str == "PTS_ARRAY_ELEMENT") { return {PTS_ARRAY_ELEMENT}; } return {}; } s_expr dex_method_to_s_expr(DexMethodRef* dex_method) { DexProto* proto = dex_method->get_proto(); std::vector<s_expr> signature; for (DexType* arg : *proto->get_args()) { signature.push_back(s_expr(arg->get_name()->str())); } return s_expr({s_expr(dex_method->get_class()->get_name()->str()), s_expr(dex_method->get_name()->str()), s_expr(proto->get_rtype()->get_name()->str()), s_expr(signature)}); } boost::optional<DexMethodRef*> s_expr_to_dex_method(const s_expr& e) { std::string type_str; std::string name_str; std::string rtype_str; s_expr signature; if (!s_patn({s_patn(&type_str), s_patn(&name_str), s_patn(&rtype_str), s_patn({}, signature)}) .match_with(e)) { return {}; } DexTypeList::ContainerType types; for (size_t arg = 0; arg < signature.size(); ++arg) { if (!signature[arg].is_string()) { return {}; } types.push_back(DexType::make_type(signature[arg].get_string().c_str())); } return {DexMethod::make_method( DexType::make_type(type_str.c_str()), DexString::make_string(name_str), DexProto::make_proto(DexType::make_type(rtype_str.c_str()), DexTypeList::make_type_list(std::move(types))))}; } } // namespace pts_impl s_expr PointsToOperation::to_s_expr() const { using namespace pts_impl; switch (kind) { case PTS_CONST_STRING: { return s_expr({op_kind_to_s_expr(kind), s_expr(dex_string->str())}); } case PTS_CONST_CLASS: case PTS_NEW_OBJECT: case PTS_CHECK_CAST: { return s_expr( {op_kind_to_s_expr(kind), s_expr(dex_type->get_name()->str())}); } case PTS_GET_EXCEPTION: case PTS_GET_CLASS: case PTS_RETURN: case PTS_DISJUNCTION: { return s_expr({op_kind_to_s_expr(kind)}); } case PTS_LOAD_PARAM: { return s_expr( {op_kind_to_s_expr(kind), s_expr(static_cast<int32_t>(parameter))}); } case PTS_IGET: case PTS_SGET: case PTS_IPUT: case PTS_SPUT: { return s_expr({op_kind_to_s_expr(kind), s_expr(dex_field->get_class()->get_name()->str()), s_expr(dex_field->get_name()->str()), s_expr(dex_field->get_type()->get_name()->str())}); } case PTS_IGET_SPECIAL: case PTS_IPUT_SPECIAL: { return s_expr( {op_kind_to_s_expr(kind), special_edge_to_s_expr(special_edge)}); } case PTS_INVOKE_VIRTUAL: case PTS_INVOKE_SUPER: case PTS_INVOKE_DIRECT: case PTS_INVOKE_INTERFACE: case PTS_INVOKE_STATIC: { return s_expr({op_kind_to_s_expr(kind), dex_method_to_s_expr(dex_method)}); } } } boost::optional<PointsToOperation> PointsToOperation::from_s_expr( const s_expr& e) { using namespace pts_impl; std::string op_kind_str; s_expr args; if (!s_patn({s_patn(&op_kind_str)}, args).match_with(e)) { return {}; } auto op_kind_opt = string_to_op_kind(op_kind_str); if (!op_kind_opt) { return {}; } PointsToOperationKind op_kind = *op_kind_opt; switch (op_kind) { case PTS_CONST_STRING: { std::string dex_string_str; if (!s_patn({s_patn(&dex_string_str)}).match_with(args)) { return {}; } return {PointsToOperation(op_kind, DexString::make_string(dex_string_str))}; } case PTS_CONST_CLASS: case PTS_NEW_OBJECT: case PTS_CHECK_CAST: { std::string dex_type_str; if (!s_patn({s_patn(&dex_type_str)}).match_with(args)) { return {}; } return { PointsToOperation(op_kind, DexType::make_type(dex_type_str.c_str()))}; } case PTS_GET_EXCEPTION: case PTS_GET_CLASS: case PTS_RETURN: case PTS_DISJUNCTION: { return {PointsToOperation(op_kind)}; } case PTS_LOAD_PARAM: { int32_t parameter; if (!s_patn({s_patn(&parameter)}).match_with(args)) { return {}; } return {PointsToOperation(op_kind, static_cast<size_t>(parameter))}; } case PTS_IGET: case PTS_SGET: case PTS_IPUT: case PTS_SPUT: { std::string container_str; std::string name_str; std::string type_str; if (!s_patn({s_patn(&container_str), s_patn(&name_str), s_patn(&type_str)}) .match_with(args)) { return {}; } return {PointsToOperation( op_kind, DexField::make_field(DexType::make_type(container_str.c_str()), DexString::make_string(name_str), DexType::make_type(type_str.c_str())))}; } case PTS_IGET_SPECIAL: case PTS_IPUT_SPECIAL: { std::string edge_str; if (!s_patn({s_patn(&edge_str)}).match_with(args)) { return {}; } auto edge = string_to_special_edge(edge_str); if (!edge) { return {}; } return {PointsToOperation(op_kind, *edge)}; } case PTS_INVOKE_VIRTUAL: case PTS_INVOKE_SUPER: case PTS_INVOKE_DIRECT: case PTS_INVOKE_INTERFACE: case PTS_INVOKE_STATIC: { s_expr dex_method_expr; if (!s_patn({s_patn(dex_method_expr)}).match_with(args)) { return {}; } auto dex_method_opt = s_expr_to_dex_method(dex_method_expr); if (!dex_method_opt) { return {}; } return {PointsToOperation(op_kind, *dex_method_opt)}; } } } namespace pts_impl { // A wrapper for a set of variables. We use this structure for the generation of // disjunctions. class PointsToVariableSet { public: struct Hash { size_t operator()(const PointsToVariableSet& s) const { return boost::hash_range(s.m_set.begin(), s.m_set.end()); } }; struct EqualTo { bool operator()(const PointsToVariableSet& s1, const PointsToVariableSet& s2) const { return s1.m_set == s2.m_set; } }; using iterator = boost::container::flat_set<PointsToVariable>::const_iterator; PointsToVariableSet() = default; template <typename InputIterator> PointsToVariableSet(InputIterator begin, InputIterator end) : m_set(begin, end) {} void insert(const PointsToVariable& v) { m_set.insert(v); } iterator begin() const { return m_set.begin(); } iterator end() const { return m_set.end(); } private: // Sets of variables correspond to sets of anchors and are generally small // (most of the time, just a singleton). We use a flat set (i.e., an ordered // associative array) in order to optimize memory usage. boost::container::flat_set<PointsToVariable> m_set; }; } // namespace pts_impl std::vector<std::pair<size_t, PointsToVariable>> PointsToAction::get_arguments() const { always_assert(m_operation.is_invoke() || m_operation.is_disjunction()); std::vector<std::pair<size_t, PointsToVariable>> args; for (const auto& binding : m_arguments) { if (binding.first >= 0) { // We filter out special arguments (like the destination variable), which // all have a negative index. args.push_back(binding); } } return args; } PointsToAction PointsToAction::load_operation( const PointsToOperation& operation, PointsToVariable dest) { always_assert(operation.is_load()); return PointsToAction(operation, {{dest_key(), dest}}); } PointsToAction PointsToAction::get_class_operation(PointsToVariable dest, PointsToVariable src) { return PointsToAction(PointsToOperation(PTS_GET_CLASS), {{dest_key(), dest}, {src_key(), src}}); } PointsToAction PointsToAction::check_cast_operation(DexType* dex_type, PointsToVariable dest, PointsToVariable src) { return PointsToAction(PointsToOperation(PTS_CHECK_CAST, dex_type), {{dest_key(), dest}, {src_key(), src}}); } PointsToAction PointsToAction::get_operation( const PointsToOperation& operation, PointsToVariable dest, boost::optional<PointsToVariable> instance) { always_assert(operation.is_get()); always_assert(!(instance && operation.kind == PTS_SPUT)); if (instance) { return PointsToAction(operation, {{dest_key(), dest}, {instance_key(), *instance}}); } else { return PointsToAction(operation, {{dest_key(), dest}}); } } PointsToAction PointsToAction::put_operation( const PointsToOperation& operation, PointsToVariable rhs, boost::optional<PointsToVariable> lhs) { always_assert(operation.is_put()); always_assert(!(lhs && operation.kind == PTS_SPUT)); if (lhs) { return PointsToAction(operation, {{lhs_key(), *lhs}, {rhs_key(), rhs}}); } else { return PointsToAction(operation, {{rhs_key(), rhs}}); } } PointsToAction PointsToAction::invoke_operation( const PointsToOperation& operation, boost::optional<PointsToVariable> dest, boost::optional<PointsToVariable> instance, const std::vector<std::pair<int32_t, PointsToVariable>>& args) { always_assert(operation.is_invoke()); always_assert(!(instance && operation.kind == PTS_INVOKE_STATIC)); std::vector<std::pair<int32_t, PointsToVariable>> bindings; bindings.reserve(args.size() + 2); if (dest) { bindings.push_back({dest_key(), *dest}); } if (instance) { bindings.push_back({instance_key(), *instance}); } bindings.insert(bindings.end(), args.begin(), args.end()); return PointsToAction(operation, bindings); } PointsToAction PointsToAction::return_operation(PointsToVariable src) { return PointsToAction(PointsToOperation(PTS_RETURN), {{src_key(), src}}); } template <typename InputIterator> PointsToAction PointsToAction::disjunction(PointsToVariable dest, InputIterator first, InputIterator last) { pts_impl::PointsToVariableSet vars(first, last); std::vector<std::pair<int32_t, PointsToVariable>> args; int32_t arg = 0; for (const auto& var : vars) { args.push_back({arg++, var}); } args.push_back({dest_key(), dest}); return PointsToAction(PointsToOperation(PTS_DISJUNCTION), args); } PointsToAction::PointsToAction( const PointsToOperation& operation, const std::vector<std::pair<int32_t, PointsToVariable>>& arguments) : m_operation(operation) { for (const auto& binding : arguments) { auto status = m_arguments.insert(binding); // Making sure that there's no duplicate binding in the argument list. always_assert(status.second); } m_arguments.shrink_to_fit(); } PointsToVariable PointsToAction::get_arg(int32_t key) const { auto it = m_arguments.find(key); always_assert(it != m_arguments.end()); return it->second; } s_expr PointsToAction::to_s_expr() const { std::vector<s_expr> args; for (const auto& arg : m_arguments) { args.push_back(s_expr({s_expr(arg.first), arg.second.to_s_expr()})); } return s_expr({m_operation.to_s_expr(), s_expr(args)}); } boost::optional<PointsToAction> PointsToAction::from_s_expr(const s_expr& e) { s_expr operation; s_expr args; if (!s_patn({s_patn(operation), s_patn({}, args)}).match_with(e)) { return {}; } auto operation_opt = PointsToOperation::from_s_expr(operation); if (!operation_opt) { return {}; } std::vector<std::pair<int32_t, PointsToVariable>> arguments; for (size_t i = 0; i < args.size(); ++i) { int32_t arg; s_expr var; if (!s_patn({s_patn(&arg), s_patn(var)}).match_with(args[i])) { return {}; } auto var_opt = PointsToVariable::from_s_expr(var); if (!var_opt) { return {}; } arguments.push_back({arg, *var_opt}); } return {PointsToAction(*operation_opt, arguments)}; } namespace pts_impl { std::string special_edge_to_string(SpecialPointsToEdge e) { switch (e) { case PTS_ARRAY_ELEMENT: { return "ARRAY_ELEM"; } default: not_reached(); } } } // namespace pts_impl std::ostream& operator<<(std::ostream& o, const PointsToAction& a) { const PointsToOperation& op = a.operation(); switch (op.kind) { case PTS_CONST_STRING: { o << a.dest() << " = " << std::quoted(op.dex_string->str()); break; } case PTS_CONST_CLASS: { o << a.dest() << " = CLASS<" << op.dex_type->get_name()->str() << ">"; break; } case PTS_GET_EXCEPTION: { o << a.dest() << " = EXCEPTION"; break; } case PTS_NEW_OBJECT: { o << a.dest() << " = NEW " << op.dex_type->get_name()->str(); break; } case PTS_LOAD_PARAM: { o << a.dest() << " = PARAM " << op.parameter; break; } case PTS_GET_CLASS: { o << a.dest() << " = GET_CLASS(" << a.src() << ")"; break; } case PTS_CHECK_CAST: { o << a.dest() << " = CAST<" << op.dex_type->get_name()->str() << ">(" << a.src() << ")"; break; } case PTS_IGET: { o << a.dest() << " = " << a.instance() << "." << op.dex_field->get_class()->get_name()->str() << "#" << op.dex_field->get_name()->str(); break; } case PTS_IGET_SPECIAL: { o << a.dest() << " = " << pts_impl::special_edge_to_string(op.special_edge) << "(" << a.instance() << ")"; break; } case PTS_SGET: { o << a.dest() << " = " << op.dex_field->get_class()->get_name()->str() << "#" << op.dex_field->get_name()->str(); break; } case PTS_IPUT: { o << a.lhs() << "." << op.dex_field->get_class()->get_name()->str() << "#" << op.dex_field->get_name()->str() << " = " << a.rhs(); break; } case PTS_IPUT_SPECIAL: { o << pts_impl::special_edge_to_string(op.special_edge) << "(" << a.lhs() << ") = " << a.rhs(); break; } case PTS_SPUT: { o << op.dex_field->get_class()->get_name()->str() << "#" << op.dex_field->get_name()->str() << " = " << a.rhs(); break; } case PTS_INVOKE_VIRTUAL: case PTS_INVOKE_SUPER: case PTS_INVOKE_DIRECT: case PTS_INVOKE_INTERFACE: case PTS_INVOKE_STATIC: { if (a.has_dest()) { o << a.dest() << " = "; } if (!op.is_static_call()) { o << a.instance() << ".{"; switch (op.kind) { case PTS_INVOKE_VIRTUAL: { o << "V"; break; } case PTS_INVOKE_SUPER: { o << "S"; break; } case PTS_INVOKE_DIRECT: { o << "D"; break; } case PTS_INVOKE_INTERFACE: { o << "I"; break; } default: { not_reached(); } } o << "}"; } o << op.dex_method->get_class()->get_name()->str() << "#" << op.dex_method->get_name()->str() << "("; auto args = a.get_arguments(); for (auto it = args.begin(); it != args.end(); ++it) { o << it->first << " => " << it->second; if (std::next(it) != args.end()) { o << ", "; } } o << ")"; break; } case PTS_RETURN: { o << "RETURN " << a.src(); break; } case PTS_DISJUNCTION: { o << a.dest() << " = "; auto args = a.get_arguments(); for (auto it = args.begin(); it != args.end(); ++it) { o << it->second; if (std::next(it) != args.end()) { o << " U "; } } break; } } return o; } namespace pts_impl { /* * In Andersen's approach to points-to analysis, a program is translated into a * system of set constraints by modeling pointers as sets. This model is * flow-insensitive, i.e., control-flow dependencies among statements are * abstracted away. While this makes the analysis more tractable, applying this * approach naively can lead to major precision problems. For example, consider * the following piece of Java code: * * x = new A(); * x.field1 = new B(); * x.field2 = new C(); * * The corresponding Dex code may look like: * * new-instance v0, LA; * invoke-direct {v0}, LA;.<init>:()V * new-instance v1, LB; * invoke-direct {v1}, LB;.<init>:()V * iput-object v1, v0, LA;.field1:LB; * new-instance v1, LC; * invoke-direct {v1}, LC;.<init>:()V * iput-object v1, v0, LA;.field2:LC; * * Applying Andersen's approach directly, we would assign a set variable to each * register in the code, thus obtaining the following system of set constraints: * * V0 = NEW LA; * V0.{D}LA;#<init>() * V1 = NEW LB; * V1.{D}LB;#<init>() * V1 = NEW LC; * V1.{D}LB;#<init>() * V0.LA;#field1 = V1 * V0.LA;#field2 = V1 * * This is obviously very bad, as the points-to sets of field1 and field2 are * equated. The problem here is that register v1 is reused in two different * contexts and that information is lost by the flow-insensitive abstraction. In * order to avoid this pitfall, we use the notion of `anchor` introduced in the * following paper: * * A. Venet. A Scalable Nonuniform Pointer Analysis for Embedded Programs. * In Proceedings of the International Static Analysis Symposium, 2004. * * Each operation returning a pointer is associated a unique `anchor`. A * preliminary intraprocedural, flow-sensitive analysis assigns a set of anchors * to each register. When the points-to equations are generated, we use the * anchors and not the registers as the basis for creating set variables. Hence, * the points-to equations for the code snippet above would look like: * * V0 = NEW LA; * V0.{D}LA;#<init>() * V1 = NEW LB; * V1.{D}LB;#<init>() * V2 = NEW LC; * V2.{D}LB;#<init>() * V0.LA;#field1 = V1 * V0.LA;#field2 = V2 * * The points-to sets of field1 and field2 are thus kept separate. We get the * same level of precision as if the code were in SSA form, without incurring * the cost of managing the SSA representation. */ using namespace std::placeholders; using namespace ir_analyzer; // We represent an anchor by a pointer to the corresponding instruction. An // empty anchor set is semantically equivalent to the `null` reference. using AnchorDomain = PatriciaTreeSetAbstractDomain<const IRInstruction*>; using AnchorEnvironment = PatriciaTreeMapAbstractEnvironment<reg_t, AnchorDomain>; class AnchorPropagation final : public BaseIRAnalyzer<AnchorEnvironment> { public: AnchorPropagation(const cfg::ControlFlowGraph& cfg, bool is_static_method, IRCode* code) : BaseIRAnalyzer(cfg), m_is_static_method(is_static_method), m_code(code), m_this_anchor(nullptr) {} void analyze_instruction(const IRInstruction* insn, AnchorEnvironment* current_state) const override { switch (insn->opcode()) { case IOPCODE_LOAD_PARAM_OBJECT: { // There's nothing to do, since this instruction has been taken care of // during the initialization of the analysis. break; } case OPCODE_MOVE_EXCEPTION: { current_state->set(insn->dest(), AnchorDomain(insn)); break; } case OPCODE_CONST_STRING: case OPCODE_CONST_CLASS: case OPCODE_CHECK_CAST: case OPCODE_NEW_INSTANCE: case OPCODE_NEW_ARRAY: case OPCODE_AGET_OBJECT: case OPCODE_IGET_OBJECT: case OPCODE_SGET_OBJECT: case OPCODE_FILLED_NEW_ARRAY: { current_state->set(RESULT_REGISTER, AnchorDomain(insn)); break; } case OPCODE_MOVE_OBJECT: { current_state->set(insn->dest(), current_state->get(insn->src(0))); break; } case IOPCODE_MOVE_RESULT_PSEUDO_OBJECT: case OPCODE_MOVE_RESULT_OBJECT: { current_state->set(insn->dest(), current_state->get(RESULT_REGISTER)); break; } case OPCODE_INVOKE_STATIC: case OPCODE_INVOKE_VIRTUAL: case OPCODE_INVOKE_SUPER: case OPCODE_INVOKE_DIRECT: case OPCODE_INVOKE_INTERFACE: { DexMethodRef* dex_method = insn->get_method(); if (type::is_object(dex_method->get_proto()->get_rtype())) { // We attach an anchor to a method invocation only if the method returns // an object. current_state->set(RESULT_REGISTER, AnchorDomain(insn)); } break; } default: { // Since registers can be reused in different contexts, we need to // invalidate the corresponding anchor sets. Note that this case also // encompasses the initialization to null, like `const v1, 0`. if (insn->has_dest()) { current_state->set(insn->dest(), AnchorDomain()); if (insn->dest_is_wide()) { current_state->set(insn->dest() + 1, AnchorDomain()); } } // There is no need to invalidate RESULT_REGISTER, because all operations // that may write a reference into RESULT_REGISTER are handled in the // switch statement. } } } void run() { MonotonicFixpointIterator::run(initial_environment()); } bool is_this_anchor(const IRInstruction* insn) const { return insn == m_this_anchor; } private: // We initialize all registers to the empty anchor set, i.e. the semantic // equivalent of `null` in our analysis. However, the parameters of the method // are initialized to the anchors of the corresponding LOAD_PARAM // instructions. AnchorEnvironment initial_environment() { AnchorEnvironment env; // We first initialize all registers to `null`. env.set(RESULT_REGISTER, AnchorDomain()); for (size_t reg = 0; reg < m_code->get_registers_size(); ++reg) { env.set(reg, AnchorDomain()); } // We then initialize the parameters of the method. bool first_param = true; for (const MethodItemEntry& mie : InstructionIterable(m_code->get_param_instructions())) { IRInstruction* insn = mie.insn; if (first_param && !m_is_static_method) { always_assert_log(insn->opcode() == IOPCODE_LOAD_PARAM_OBJECT, "Unexpected instruction '%s' in virtual method\n", SHOW(insn)); m_this_anchor = insn; } switch (insn->opcode()) { case IOPCODE_LOAD_PARAM_OBJECT: { env.set(insn->dest(), AnchorDomain(insn)); break; } case IOPCODE_LOAD_PARAM: case IOPCODE_LOAD_PARAM_WIDE: { break; } default: { not_reached_log("Unexpected instruction '%s'\n", SHOW(insn)); } } first_param = false; } return env; } bool m_is_static_method; IRCode* m_code; IRInstruction* m_this_anchor; }; // Generates the points-to actions for a single method. class PointsToActionGenerator final { public: explicit PointsToActionGenerator(DexMethod* dex_method, PointsToMethodSemantics* semantics, const TypeSystem& type_system, const PointsToSemanticsUtils& utils) : m_dex_method(dex_method), m_semantics(semantics), m_type_system(type_system), m_utils(utils) {} void run() { IRCode* code = m_dex_method->get_code(); always_assert(code != nullptr); code->build_cfg(/* editable */ false); cfg::ControlFlowGraph& cfg = code->cfg(); cfg.calculate_exit_block(); // We first propagate the anchors across the code. m_analysis = std::make_unique<AnchorPropagation>(cfg, is_static(m_dex_method), code); m_analysis->run(); // Then we assign a unique variable to each anchor. name_anchors(cfg); // The LOAD_PARAM_* instructions sit next to each other at the beginning of // the entry block. We need to process them first. size_t param_cursor = 0; bool first_param = true; for (const MethodItemEntry& mie : InstructionIterable(cfg.get_param_instructions())) { IRInstruction* insn = mie.insn; switch (insn->opcode()) { case IOPCODE_LOAD_PARAM_OBJECT: { if (first_param && !is_static(m_dex_method)) { // If the method is not static, the first parameter corresponds to // `this`, which is represented using a special points-to variable. } else { m_semantics->add(PointsToAction::load_operation( PointsToOperation(PTS_LOAD_PARAM, param_cursor++), get_variable_from_anchor(insn))); } break; } case IOPCODE_LOAD_PARAM: case IOPCODE_LOAD_PARAM_WIDE: { ++param_cursor; break; } default: not_reached(); } first_param = false; } // We go over each IR instruction and generate the corresponding points-to // actions. for (cfg::Block* block : cfg.blocks()) { AnchorEnvironment state = m_analysis->get_entry_state_at(block); for (const MethodItemEntry& mie : InstructionIterable(*block)) { IRInstruction* insn = mie.insn; generate_action(insn, state); m_analysis->analyze_instruction(insn, &state); } } m_semantics->shrink(); } private: // We associate each anchor with a unique points-to variable. void name_anchors(const cfg::ControlFlowGraph& cfg) { for (cfg::Block* block : cfg.blocks()) { for (const MethodItemEntry& mie : *block) { if (mie.type == MFLOW_OPCODE) { IRInstruction* insn = mie.insn; if (is_anchored_instruction(insn)) { m_anchors.insert({insn, m_semantics->get_new_variable()}); } } } } } // Each IR instruction that returns a result of reference type is assigned an // anchor. bool is_anchored_instruction(IRInstruction* insn) const { switch (insn->opcode()) { case IOPCODE_LOAD_PARAM_OBJECT: case OPCODE_MOVE_EXCEPTION: case OPCODE_CONST_STRING: case OPCODE_CONST_CLASS: case OPCODE_CHECK_CAST: case OPCODE_NEW_INSTANCE: case OPCODE_NEW_ARRAY: case OPCODE_AGET_OBJECT: case OPCODE_IGET_OBJECT: case OPCODE_SGET_OBJECT: case OPCODE_FILLED_NEW_ARRAY: { return true; } case OPCODE_INVOKE_STATIC: case OPCODE_INVOKE_VIRTUAL: case OPCODE_INVOKE_SUPER: case OPCODE_INVOKE_DIRECT: case OPCODE_INVOKE_INTERFACE: { return type::is_object(insn->get_method()->get_proto()->get_rtype()); } default: { return false; } } } void generate_action(IRInstruction* insn, const AnchorEnvironment& state) { switch (insn->opcode()) { case OPCODE_MOVE_EXCEPTION: { m_semantics->add( PointsToAction::load_operation(PointsToOperation(PTS_GET_EXCEPTION), get_variable_from_anchor(insn))); break; } case OPCODE_RETURN_OBJECT: { m_semantics->add(PointsToAction::return_operation( get_variable_from_anchor_set(state.get(insn->src(0))))); break; } case OPCODE_CONST_STRING: { m_semantics->add(PointsToAction::load_operation( PointsToOperation(PTS_CONST_STRING, insn->get_string()), get_variable_from_anchor(insn))); break; } case OPCODE_CONST_CLASS: { m_semantics->add(PointsToAction::load_operation( PointsToOperation(PTS_CONST_CLASS, insn->get_type()), get_variable_from_anchor(insn))); break; } case OPCODE_CHECK_CAST: { m_semantics->add(PointsToAction::check_cast_operation( insn->get_type(), get_variable_from_anchor(insn), get_variable_from_anchor_set(state.get(insn->src(0))))); break; } case OPCODE_NEW_INSTANCE: { DexType* dex_type = insn->get_type(); if (m_type_system.is_subtype(type::java_lang_Throwable(), dex_type)) { // If the object created is an exception (i.e., its type inherits from // java.lang.Throwable), we use PTS_GET_EXCEPTION. In our semantic // model, the exact identity of an exception is abstracted away for // simplicity. The operation PTS_GET_EXCEPTION can be interpreted as a // nondeterministic choice among all abstract object instances that are // exceptions. m_semantics->add( PointsToAction::load_operation(PointsToOperation(PTS_GET_EXCEPTION), get_variable_from_anchor(insn))); break; } // Otherwise, we fall through to the generic case. } FALLTHROUGH_INTENDED; case OPCODE_NEW_ARRAY: case OPCODE_FILLED_NEW_ARRAY: { m_semantics->add(PointsToAction::load_operation( PointsToOperation(PTS_NEW_OBJECT, insn->get_type()), get_variable_from_anchor(insn))); if (insn->opcode() == OPCODE_FILLED_NEW_ARRAY) { const DexType* element_type = type::get_array_element_type(insn->get_type()); if (!type::is_object(element_type)) { break; } auto lhs = boost::optional<PointsToVariable>(get_variable_from_anchor(insn)); for (size_t i = 0; i < insn->srcs_size(); ++i) { m_semantics->add(PointsToAction::put_operation( PointsToOperation(PTS_IPUT_SPECIAL, PTS_ARRAY_ELEMENT), get_variable_from_anchor_set(state.get(insn->src(i))), lhs)); } } break; } case OPCODE_APUT_OBJECT: { PointsToVariable rhs = get_variable_from_anchor_set(state.get(insn->src(0))); PointsToVariable lhs = get_variable_from_anchor_set(state.get(insn->src(1))); m_semantics->add(PointsToAction::put_operation( PointsToOperation(PTS_IPUT_SPECIAL, PTS_ARRAY_ELEMENT), rhs, boost::optional<PointsToVariable>(lhs))); break; } case OPCODE_IPUT_OBJECT: { PointsToVariable rhs = get_variable_from_anchor_set(state.get(insn->src(0))); PointsToVariable lhs = get_variable_from_anchor_set(state.get(insn->src(1))); m_semantics->add(PointsToAction::put_operation( PointsToOperation(PTS_IPUT, insn->get_field()), rhs, boost::optional<PointsToVariable>(lhs))); break; } case OPCODE_SPUT_OBJECT: { m_semantics->add(PointsToAction::put_operation( PointsToOperation(PTS_SPUT, insn->get_field()), get_variable_from_anchor_set(state.get(insn->src(0))))); break; } case OPCODE_AGET_OBJECT: { PointsToVariable instance = get_variable_from_anchor_set(state.get(insn->src(0))); m_semantics->add(PointsToAction::get_operation( PointsToOperation(PTS_IGET_SPECIAL, PTS_ARRAY_ELEMENT), get_variable_from_anchor(insn), boost::optional<PointsToVariable>(instance))); break; } case OPCODE_IGET_OBJECT: { PointsToVariable instance = get_variable_from_anchor_set(state.get(insn->src(0))); m_semantics->add(PointsToAction::get_operation( PointsToOperation(PTS_IGET, insn->get_field()), get_variable_from_anchor(insn), boost::optional<PointsToVariable>(instance))); break; } case OPCODE_SGET_OBJECT: { // One way to get the java.lang.Class object of a primitive type is by // querying the `TYPE` field of the corresponding wrapper class. We // translate those kind of sget-object instructions into equivalent // PTS_CONST_CLASS operations. if (m_utils.is_primitive_type_class_object_retrieval(insn)) { m_semantics->add(PointsToAction::load_operation( PointsToOperation(PTS_CONST_CLASS, insn->get_field()->get_class()), get_variable_from_anchor(insn))); break; } m_semantics->add(PointsToAction::get_operation( PointsToOperation(PTS_SGET, insn->get_field()), get_variable_from_anchor(insn))); break; } case OPCODE_INVOKE_STATIC: case OPCODE_INVOKE_VIRTUAL: case OPCODE_INVOKE_SUPER: case OPCODE_INVOKE_DIRECT: case OPCODE_INVOKE_INTERFACE: { translate_invoke(insn, state); break; } default: { // All other instructions are either transparent to points-to analysis or // have already been taken care of (LOAD_PARAM_*). } } } // This is where we can provide the semantics of external API calls that are // relevant to points-to analysis and for which the source code is either // unavailable or hard to process automatically (e.g., native methods). void translate_invoke(IRInstruction* insn, const AnchorEnvironment& state) { // Calls to java.lang.Object#getClass() are converted to a points-to // operation in order to simplify the analysis. if (m_utils.is_get_class_invocation(insn)) { m_semantics->add(PointsToAction::get_class_operation( get_variable_from_anchor(insn), get_variable_from_anchor_set(state.get(insn->src(0))))); return; } // Otherwise, we default to the general translation of method calls. default_invoke_translation(insn, state); } void default_invoke_translation(IRInstruction* insn, const AnchorEnvironment& state) { DexMethodRef* dex_method = insn->get_method(); DexProto* proto = dex_method->get_proto(); const auto* signature = proto->get_args(); std::vector<std::pair<int32_t, PointsToVariable>> args; size_t src_idx{0}; // Allocate a variable for the returned object if any. boost::optional<PointsToVariable> dest; if (type::is_object(insn->get_method()->get_proto()->get_rtype())) { dest = {get_variable_from_anchor(insn)}; } // Allocate a variable for the instance object if any. boost::optional<PointsToVariable> instance; if (insn->opcode() != OPCODE_INVOKE_STATIC) { // The first argument is a reference to the object instance on which the // method is invoked. instance = { get_variable_from_anchor_set(state.get(insn->src(src_idx++)))}; } // Process the arguments of the method invocation. int32_t arg_pos = 0; for (DexType* dex_type : *signature) { if (type::is_object(dex_type)) { args.push_back({arg_pos, get_variable_from_anchor_set( state.get(insn->src(src_idx++)))}); } else { // We skip this argument. ++src_idx; } ++arg_pos; } // Select the right points-to operation. PointsToOperationKind invoke_kind; switch (insn->opcode()) { case OPCODE_INVOKE_STATIC: { invoke_kind = PTS_INVOKE_STATIC; break; } case OPCODE_INVOKE_VIRTUAL: { invoke_kind = PTS_INVOKE_VIRTUAL; break; } case OPCODE_INVOKE_SUPER: { invoke_kind = PTS_INVOKE_SUPER; break; } case OPCODE_INVOKE_DIRECT: { invoke_kind = PTS_INVOKE_DIRECT; break; } case OPCODE_INVOKE_INTERFACE: { invoke_kind = PTS_INVOKE_INTERFACE; break; } default: { // This function is only called on invoke instructions. not_reached(); } } m_semantics->add(PointsToAction::invoke_operation( PointsToOperation(invoke_kind, insn->get_method()), dest, instance, args)); } PointsToVariable get_variable_from_anchor(const IRInstruction* insn) { if (m_analysis->is_this_anchor(insn)) { return PointsToVariable::this_variable(); } auto it = m_anchors.find(insn); always_assert(it != m_anchors.end()); return it->second; } // If the anchor set is not a singleton, we need to introduce a disjunction // operation. PointsToVariable get_variable_from_anchor_set(const AnchorDomain& s) { // By design, the analysis can't generate the Top value. always_assert(!s.is_top()); if (s.is_bottom()) { // This means that some code in the method is unreachable. TRACE(PTA, 2, "Unreachable code in %s", SHOW(m_dex_method)); return PointsToVariable(); } auto anchors = s.elements(); if (anchors.empty()) { // The denotation of the anchor set is just the `null` reference. This is // represented by a special points-to variable. return PointsToVariable::null_variable(); } if (anchors.size() == 1) { // When the anchor set is a singleton, there is no need to introduce a // disjunction. return get_variable_from_anchor(*anchors.begin()); } // Otherwise, we need a disjunction. PointsToVariableSet ptv_set; for (const IRInstruction* insn : anchors) { ptv_set.insert(get_variable_from_anchor(insn)); } auto it = m_anchor_sets.find(ptv_set); if (it != m_anchor_sets.end()) { // The disjunction has already been generated. return it->second; } // Otherwise, we create a new disjunction. PointsToVariable new_v = m_semantics->get_new_variable(); m_anchor_sets.emplace(ptv_set, new_v); // We insert the newly created disjunction before its first use. m_semantics->add( PointsToAction::disjunction(new_v, ptv_set.begin(), ptv_set.end())); return new_v; } DexMethod* m_dex_method; PointsToMethodSemantics* m_semantics; const TypeSystem& m_type_system; const PointsToSemanticsUtils& m_utils; std::unique_ptr<AnchorPropagation> m_analysis; // We assign each anchor a points-to variable. This map keeps track of the // naming. std::unordered_map<const IRInstruction*, PointsToVariable> m_anchors; // A table that keeps track of all disjunctions already created, so that we // only generate one disjuction per anchor set. std::unordered_map<PointsToVariableSet, PointsToVariable, PointsToVariableSet::Hash, PointsToVariableSet::EqualTo> m_anchor_sets; }; // Removes points-to equations that have no effect on the computation of the // points-to analysis. We compute the dependency graph of the points-to // equations and we discard all variables that are not involved in any relevant // computation. class Shrinker final { public: explicit Shrinker(std::vector<PointsToAction>* pt_actions) : m_pt_actions(pt_actions) {} void run() { // We first identify all the variables that we surely need to keep in order // to perform the points-to analysis. find_root_vars(); // We then compute the dependency graph: there is an edge v -> w between // points-to variables v and w iff the value of w is needed to compute the // value of v. build_dependency_graph(); // We compute the set of variables that are reachable from any one of the // root variables in the dependency graph. collect_reachable_vars(); // We remove any points-to equation assigning a value to a variable that // hasn't been marked as reachable in the previous step. shrink_points_to_actions(); } private: using VariableSet = std::unordered_set<PointsToVariable, boost::hash<PointsToVariable>>; // We keep all `put`, `invoke` and `return` operations, since they presumably // have an effect on the analysis. void find_root_vars() { for (const PointsToAction& pt_action : *m_pt_actions) { const PointsToOperation& op = pt_action.operation(); if (op.is_put()) { if (!op.is_sput()) { m_root_vars.insert(pt_action.lhs()); } m_root_vars.insert(pt_action.rhs()); continue; } if (op.is_invoke()) { if (op.is_virtual_call()) { m_root_vars.insert(pt_action.instance()); } for (const auto& arg : pt_action.get_arguments()) { m_root_vars.insert(arg.second); } continue; } if (op.is_return()) { m_root_vars.insert(pt_action.src()); continue; } } } // When building the dependency graph, we are only interested in operations // that assign a value to a variable but don't create any points-to relation // between objects (unlike a `put` operation, for example). We don't consider // `load` operations, because they can't create dependencies among variables. // An edge v -> w in the dependency graph means that computing the value of // variable v requires the value of variable w. void build_dependency_graph() { for (const PointsToAction& pt_action : *m_pt_actions) { const PointsToOperation& op = pt_action.operation(); if (op.is_get_class() || op.is_check_cast()) { add_dependency(pt_action.dest(), pt_action.src()); continue; } if (op.is_get() && !op.is_sget()) { add_dependency(pt_action.dest(), pt_action.instance()); continue; } if (op.is_disjunction()) { for (const auto& arg : pt_action.get_arguments()) { add_dependency(pt_action.dest(), arg.second); } continue; } } } void add_dependency(const PointsToVariable& x, const PointsToVariable& y) { m_dependency_graph[x].insert(y); } // If there exists a path from any root variable to a variable v, this means // that the value of variable v is required for performing the points-to // analysis. All other variables can safely be discarded. We compute the set // of reachable variables using a simple breadth-first traversal of the graph. void collect_reachable_vars() { std::deque<PointsToVariable> queue(m_root_vars.begin(), m_root_vars.end()); while (!queue.empty()) { PointsToVariable v = queue.back(); queue.pop_back(); // Note that the variables already visited are exactly the variables that // we need to keep. if (m_vars_to_keep.count(v) != 0) { continue; } m_vars_to_keep.insert(v); const auto& deps = m_dependency_graph[v]; queue.insert(queue.begin(), deps.begin(), deps.end()); } } void shrink_points_to_actions() { // Any `load`, `check_cast`, `get` or `disjunction` operation assigning a // value to a variable that hasn't been marked to keep can safely be // discarded. m_pt_actions->erase( std::remove_if(m_pt_actions->begin(), m_pt_actions->end(), [this](const PointsToAction& pt_action) { const PointsToOperation& op = pt_action.operation(); return (op.is_load() || op.is_check_cast() || op.is_get() || op.is_disjunction()) && (m_vars_to_keep.count(pt_action.dest()) == 0); }), m_pt_actions->end()); m_pt_actions->shrink_to_fit(); // We can also safely remove the `dest` variable of a method call if it // hasn't been marked to keep. Computing the return value of a virtual call // during the analysis may entail performing the join of multiple points-to // sets, which is costly. Hence, removing unneeded return values is a // valuable optimization. for (PointsToAction& pt_action : *m_pt_actions) { if (pt_action.operation().is_invoke() && pt_action.has_dest() && (m_vars_to_keep.count(pt_action.dest()) == 0)) { pt_action.remove_dest(); } } } std::vector<PointsToAction>* m_pt_actions; std::unordered_map<PointsToVariable, VariableSet, boost::hash<PointsToVariable>> m_dependency_graph; VariableSet m_root_vars; VariableSet m_vars_to_keep; }; /* clang-format off */ #define KIND_STRING_TABLE \ { \ KIND_STRING(PTS_APK), \ KIND_STRING(PTS_ABSTRACT), \ KIND_STRING(PTS_NATIVE), \ KIND_STRING(PTS_STUB), \ } \ #define KIND_STRING(X) {X, #X} std::unordered_map<MethodKind, std::string, std::hash<int>> method_kind_to_string_table = KIND_STRING_TABLE; #undef KIND_STRING #define KIND_STRING(X) {#X, X} std::unordered_map<std::string, MethodKind> string_to_method_kind_table = KIND_STRING_TABLE; #undef KIND_STRING /* clang-format on */ s_expr method_kind_to_s_expr(MethodKind kind) { auto it = method_kind_to_string_table.find(kind); always_assert(it != method_kind_to_string_table.end()); return s_expr(it->second); } boost::optional<MethodKind> string_to_method_kind(const std::string& str) { auto it = string_to_method_kind_table.find(str); if (it == string_to_method_kind_table.end()) { return {}; } return boost::optional<MethodKind>(it->second); } } // namespace pts_impl PointsToMethodSemantics::PointsToMethodSemantics(DexMethodRef* dex_method, MethodKind kind, size_t start_var_id, size_t size_hint) : m_dex_method(dex_method), m_kind(kind), m_variable_counter(start_var_id) { m_points_to_actions.reserve(size_hint); } void PointsToMethodSemantics::shrink() { pts_impl::Shrinker shrinker(&m_points_to_actions); shrinker.run(); } s_expr PointsToMethodSemantics::to_s_expr() const { using namespace pts_impl; std::vector<s_expr> actions; std::transform(m_points_to_actions.begin(), m_points_to_actions.end(), std::back_inserter(actions), [](const PointsToAction& a) { return a.to_s_expr(); }); return s_expr( {dex_method_to_s_expr(m_dex_method), method_kind_to_s_expr(m_kind), s_expr(static_cast<int32_t>(m_variable_counter)), s_expr(actions)}); } boost::optional<PointsToMethodSemantics> PointsToMethodSemantics::from_s_expr( const s_expr& e) { using namespace pts_impl; s_expr dex_method_expr; std::string kind_str; int32_t var_counter; s_expr actions_expr; if (!s_patn({s_patn(dex_method_expr), s_patn(&kind_str), s_patn(&var_counter), s_patn({}, actions_expr)}) .match_with(e)) { return {}; } auto dex_method_op = s_expr_to_dex_method(dex_method_expr); if (!dex_method_op) { return {}; } auto kind_opt = string_to_method_kind(kind_str); if (!kind_opt) { return {}; } PointsToMethodSemantics semantics( *dex_method_op, *kind_opt, var_counter, actions_expr.size()); for (size_t i = 0; i < actions_expr.size(); ++i) { auto action_opt = PointsToAction::from_s_expr(actions_expr[i]); if (!action_opt) { return {}; } semantics.add(*action_opt); } return boost::optional<PointsToMethodSemantics>(semantics); } std::ostream& operator<<(std::ostream& o, const PointsToMethodSemantics& s) { o << s.m_dex_method->get_class()->get_name()->str() << "#" << s.m_dex_method->get_name()->str() << ": " << SHOW(s.m_dex_method->get_proto()) << " "; switch (s.kind()) { case PTS_ABSTRACT: { o << "= ABSTRACT"; break; } case PTS_NATIVE: { o << "= NATIVE"; break; } case PTS_APK: case PTS_STUB: { o << "{" << std::endl; for (const auto& a : s.get_points_to_actions()) { o << " " << a << std::endl; } o << "}"; break; } } o << std::endl; return o; } PointsToSemantics::PointsToSemantics(const Scope& scope, bool generate_stubs) : m_generate_stubs(generate_stubs), m_type_system(scope) { // We size the hash table so as to fit all the methods in scope. size_t method_count = 0; for (DexClass* dex_class : scope) { method_count += dex_class->get_dmethods().size() + dex_class->get_vmethods().size(); } m_method_semantics.reserve(method_count); // We initialize all entries in the hash table, which can then be concurrently // accessed by the workers using the thread-safe find() operation of // std::unordered_map. for (DexClass* dex_class : scope) { for (DexMethod* dmethod : dex_class->get_dmethods()) { initialize_entry(dmethod); } for (DexMethod* vmethod : dex_class->get_vmethods()) { initialize_entry(vmethod); } } // We generate a system of points-to actions for each Dex method in parallel. walk::parallel::methods(scope, [this](DexMethod* dex_method) { generate_points_to_actions(dex_method); }); } void PointsToSemantics::load_stubs(const std::string& file_name) { std::ifstream file_input(file_name); s_expr_istream s_expr_input(file_input); while (s_expr_input.good()) { s_expr expr; s_expr_input >> expr; if (s_expr_input.eoi()) { break; } always_assert_log( !s_expr_input.fail(), "%s\n", s_expr_input.what().c_str()); auto semantics_opt = PointsToMethodSemantics::from_s_expr(expr); always_assert_log( semantics_opt, "Couldn't parse S-expression: %s\n", expr.str().c_str()); DexMethodRef* dex_method = semantics_opt->get_method(); auto it = m_method_semantics.find(dex_method); if (it == m_method_semantics.end()) { m_method_semantics.emplace(dex_method, *semantics_opt); } else { TRACE(PTA, 2, "Collision with stub for method %s", SHOW(dex_method)); } } } boost::optional<PointsToMethodSemantics*> PointsToSemantics::get_method_semantics(DexMethodRef* dex_method) { auto entry = m_method_semantics.find(dex_method); if (entry == m_method_semantics.end()) { return {}; } return boost::optional<PointsToMethodSemantics*>(&entry->second); } MethodKind PointsToSemantics::default_method_kind() const { return m_generate_stubs ? PTS_STUB : PTS_APK; } void PointsToSemantics::initialize_entry(DexMethod* dex_method) { DexAccessFlags access_flags = dex_method->get_access(); MethodKind kind; if (dex_method->get_code() == nullptr) { if ((access_flags & DexAccessFlags::ACC_ABSTRACT)) { kind = PTS_ABSTRACT; } else { // The definition of a method that is neither abstract nor native should // always have an associated IRCode component. redex_assert(access_flags & DexAccessFlags::ACC_NATIVE); kind = PTS_NATIVE; } } else { kind = default_method_kind(); } m_method_semantics.emplace(std::piecewise_construct, std::forward_as_tuple(dex_method), std::forward_as_tuple(/* dex_method */ dex_method, /* kind */ kind, /* start_var_id */ 0, /* size_hint */ 8)); } void PointsToSemantics::generate_points_to_actions(DexMethod* dex_method) { // According to section [container.requirements.dataraces] of the C++ standard // definition document, the find() method of std::unordered_map is // thread-safe. Since this function operates on a single Dex method and the // hash table m_method_semantics is indexed by Dex methods, the following code // is not subject to data races. auto entry = m_method_semantics.find(dex_method); // All hash table entries have been initialized in the constructor. always_assert(entry != m_method_semantics.end()); PointsToMethodSemantics* semantics = &entry->second; if (semantics->kind() == default_method_kind()) { pts_impl::PointsToActionGenerator generator( dex_method, semantics, m_type_system, m_utils); generator.run(); } } std::ostream& operator<<(std::ostream& o, const PointsToSemantics& s) { for (const auto& entry : s.m_method_semantics) { o << entry.second; } return o; }