libredex/DexUtil.h (242 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #pragma once #include <algorithm> #include <boost/algorithm/string/predicate.hpp> #include <functional> #include <string_view> #include <unordered_set> #include <vector> #include "ClassUtil.h" #include "Debug.h" #include "DexClass.h" #include "DexStore.h" #include "IRInstruction.h" #include "MethodUtil.h" #include "TypeUtil.h" /** * Given an instruction, determine which class' would get initialized, if any. */ const DexType* get_init_class_type_demand(const IRInstruction* insn); /** * Data structure to represent requested but unapplied visibility changes. */ struct VisibilityChanges { std::unordered_set<DexClass*> classes; std::unordered_set<DexField*> fields; std::unordered_set<DexMethod*> methods; void insert(const VisibilityChanges& other); void apply() const; bool empty() const; }; /** * Change the visibility of members accessed in a method. * We make everything public, except if a scope argument is given; then accessed * members in the same scope will not be made public (We could be more precise * and walk the inheritance hierarchy as needed.) */ VisibilityChanges get_visibility_changes(const DexMethod* method, DexType* scope = nullptr); inline void change_visibility(const DexMethod* method, DexType* scope = nullptr) { get_visibility_changes(method, scope).apply(); } // The given code can be in cfg form. VisibilityChanges get_visibility_changes( const IRCode* code, DexType* scope, const DexMethod* effective_caller_resolved_from); inline void change_visibility(const IRCode* code, DexType* scope, const DexMethod* effective_caller_resolved_from) { get_visibility_changes(code, scope, effective_caller_resolved_from).apply(); } VisibilityChanges get_visibility_changes( const cfg::ControlFlowGraph& cfg, DexType* scope, const DexMethod* effective_caller_resolved_from); /** * NOTE: Only relocates the method. Doesn't check the correctness here, * nor does it make sure that the members are accessible from the * new type. */ void relocate_method(DexMethod* method, DexType* to_type); void relocate_field(DexField* field, DexType* to_type); /** * Checks if a method can be relocated, i.e. if it doesn't require any changes * to invoked direct methods (none of the invoked direct methods would need to * change into a public virtual / static method) or framework protected methods. * Any problematic invoked methods are added to the optionally supplied set. */ bool gather_invoked_methods_that_prevent_relocation( const DexMethod* method, std::unordered_set<DexMethodRef*>* methods_preventing_relocation = nullptr); /** * Relocates the method only if * gather_invoked_methods_that_prevent_relocation returns true. * It also updates the visibility of the accessed members. * NOTE: Does not check if get_visibility_changes(...) is empty. * TODO: Consider integrating the full visibility check in * gather_invoked_methods_that_prevent_relocation. */ bool relocate_method_if_no_changes(DexMethod* method, DexType* to_type); /** * Merge the 2 visibility access flags. Return the most permissive visibility. */ DexAccessFlags merge_visibility(uint32_t vis1, uint32_t vis2); /** * Sorts and unique-ifies the given vector. */ template <class T, class Cmp = std::less<T>> void sort_unique(std::vector<T>& vec, Cmp cmp = std::less<T>()) { std::sort(vec.begin(), vec.end(), cmp); auto last = std::unique(vec.begin(), vec.end()); vec.erase(last, vec.end()); } /** * True if this instruction is passing through all the args of its enclosing * method. This predicate simplifies inlining optimizations since otherwise * the optimization would have to re-map the input regs. The N arguments to * the invoke should be the last N registers of the frame. */ bool passes_args_through(IRInstruction* insn, const IRCode& code, int ignore = 0); /** * Creates a runtime exception block of instructions. This is primarily used * by transformations for substituting instructions which throw an exception * at runtime. Currently, used for substituting switch case instructions. */ void create_runtime_exception_block(const DexString* except_str, std::vector<IRInstruction*>& block); /** * Generates a Scope& object from a set of Dexes. * */ template <class T> Scope build_class_scope(const T& dexen) { Scope v; for (auto const& classes : dexen) { for (auto clazz : classes) { v.push_back(clazz); } } return v; }; Scope build_class_scope(const DexStoresVector& stores); Scope build_class_scope_for_packages( const DexStoresVector& stores, const std::unordered_set<std::string>& package_names); /** * Posts the changes made to the Scope& object to the * Dexes. * */ template <class T> void post_dexen_changes(const Scope& v, T& dexen) { std::unordered_set<DexClass*> clookup(v.begin(), v.end()); for (auto& classes : dexen) { classes.erase( std::remove_if(classes.begin(), classes.end(), [&](DexClass* cls) { return !clookup.count(cls); }), classes.end()); } if (debug) { std::unordered_set<DexClass*> dlookup; for (auto const& classes : dexen) { for (auto const& cls : classes) { dlookup.insert(cls); } } for (auto const& cls : clookup) { assert_log(dlookup.count(cls), "Can't add classes in post_dexen_changes"); } } }; void post_dexen_changes(const Scope& v, DexStoresVector& stores); void load_root_dexen(DexStore& store, const std::string& dexen_dir_str, bool balloon = false, bool throw_on_balloon_error = true, bool verbose = true, int support_dex_version = 35); /** * Creates a generated store based on the given classes. * * NOTE: InterDex will take care of adding the classes to the root store. * TODO: Add a way to define a real store. */ void create_store(const std::string& store_name, DexStoresVector& stores, DexClasses classes); /** * Determine if the given dex item has the given annotation * * @param t The dex item whose annotations we'll examine * @param anno_type The annotation we're looking for, expressed as DexType * @return true IFF dex item t is annotated with anno_type */ template <typename T> bool has_anno(const T* t, const DexType* anno_type) { if (anno_type == nullptr) return false; if (t->get_anno_set() == nullptr) return false; for (const auto& anno : t->get_anno_set()->get_annotations()) { if (anno->type() == anno_type) { return true; } } return false; } template <typename T> bool has_anno(const T* t, const std::unordered_set<DexType*>& anno_types) { if (t->get_anno_set() == nullptr) return false; for (const auto& anno : t->get_anno_set()->get_annotations()) { if (anno_types.count(anno->type())) { return true; } } return false; } // Check whether the given string is a valid identifier. This does // not handle UTF. Checks against the Java bytecode specification, // which is a bit more relaxed than Dex's. bool is_valid_identifier(std::string_view s); namespace java_names { inline boost::optional<std::string> primitive_desc_to_name(char desc) { const static std::unordered_map<char, std::string> conversion_table{ {'V', "void"}, {'B', "byte"}, {'C', "char"}, {'S', "short"}, {'I', "int"}, {'J', "long"}, {'Z', "boolean"}, {'F', "float"}, {'D', "double"}, }; auto it = conversion_table.find(desc); if (it != conversion_table.end()) { return it->second; } else { return boost::none; } } inline boost::optional<char> primitive_name_to_desc(std::string_view name) { const static std::unordered_map<std::string_view, char> conversion_table{ {"void", 'V'}, {"byte", 'B'}, {"char", 'C'}, {"short", 'S'}, {"int", 'I'}, {"long", 'J'}, {"boolean", 'Z'}, {"float", 'F'}, {"double", 'D'}, }; auto it = conversion_table.find(name); if (it != conversion_table.end()) { return it->second; } else { return boost::none; } } // Example: "Ljava/lang/String;" --> "java.lang.String" // Example: "[Ljava/lang/String;" --> "[Ljava.lang.String;" // Example: "I" --> "int" // Example: "[I" --> "[I" inline std::string internal_to_external(std::string_view internal_name) { int array_level = std::count(internal_name.begin(), internal_name.end(), '['); std::string_view component_name = internal_name.substr(array_level); char type = component_name.at(0); if (type == 'L') { // For arrays, we need to preserve the semicolon at the end of the name auto external_name = std::string(component_name.substr( 1, component_name.size() - (array_level == 0 ? 2 : 1))); std::replace(external_name.begin(), external_name.end(), '/', '.'); std::string array_name(array_level, '['); if (array_level != 0) { array_name += "L"; // external only uses 'L' for arrays } return array_name + external_name; } else if (array_level) { // If the type is an array of primitives, the external format is the same // as internal. return std::string(internal_name); } else { auto maybe_external_name = primitive_desc_to_name(type); always_assert_log( maybe_external_name, "%c is not a valid primitive type.", type); return *maybe_external_name; } } // Example: "java.lang.String" --> "Ljava/lang/String;" // Example: "[Ljava.lang.String;" --> "[Ljava/lang/String;" // Example: "int" --> "I" // Example: "[I" --> "[I" // Example: "I" --> "LI;" // Example: "[LI;" --> "[LI;" inline std::string external_to_internal(std::string_view external_name) { // Primitive types (not including their arrays) are special notations auto maybe_primitive_name = primitive_name_to_desc(external_name); if (maybe_primitive_name) { return std::string(1, *maybe_primitive_name); } int array_level = std::count(external_name.begin(), external_name.end(), '['); auto component_external_name = external_name.substr(array_level); /** * Note: "I" is a perfectly valid external name denoting a class of "LI;" * while "int" is the external name for int type. However, "[I" is an array of * int. For an array of "I", you need to use "[LI;" */ if (array_level != 0 && component_external_name.size() == 1) { // It must be an array of primitives. The internal name is the same as the // external name. return std::string(external_name); } std::string component_internal_name(component_external_name); if (array_level == 0) { component_internal_name = "L" + component_internal_name; } std::replace( component_internal_name.begin(), component_internal_name.end(), '.', '/'); if (!boost::algorithm::ends_with(component_internal_name, ";")) { component_internal_name += ";"; } std::string array_prefix; array_prefix.reserve(array_level); for (int i = 0; i < array_level; i++) { array_prefix += '['; } return array_prefix + component_internal_name; } // Example: "Ljava/lang/String;" --> "String" // Example: "[Ljava/lang/String;" --> "String[]" // Example: "I" --> "int" // Example: "[I" --> "int[]" // Example: "LA$B$C;" --> "C" // Example: "[LA$B;" --> "B[]" // Example: "Ljava/lang$1;" --> "" // Note: kotlin anonymous class is not handled properly here. inline std::string internal_to_simple(std::string_view internal_name) { int array_level = std::count(internal_name.begin(), internal_name.end(), '['); auto component_name = internal_name.substr(array_level); std::string component_external_name = internal_to_external(component_name); std::size_t last_dot = component_external_name.rfind('.'); std::size_t last_dollar = component_external_name.rfind('$'); std::string component_simple_name; if (last_dot == std::string::npos && last_dollar == std::string::npos) { component_simple_name = component_external_name; } else if (last_dot == std::string::npos) { component_simple_name = component_external_name.substr(last_dollar + 1); } else if (last_dollar == std::string::npos) { component_simple_name = component_external_name.substr(last_dot + 1); } else { size_t simple_begin = (last_dot < last_dollar) ? last_dollar : last_dot; component_simple_name = component_external_name.substr(simple_begin + 1); } if (std::all_of(component_simple_name.begin(), component_simple_name.end(), isdigit)) { component_simple_name = ""; } // append a pair of [] for each array level. std::string array_suffix; array_suffix.reserve(2 * array_level); for (int i = 0; i < array_level; i++) { array_suffix += "[]"; } return component_simple_name + array_suffix; } inline std::string package_name(std::string_view type_name) { std::string nice_name = internal_to_external(type_name); std::size_t last_dot = nice_name.rfind('.'); if (last_dot != std::string::npos) { return nice_name.substr(0, last_dot); } else { // something went wrong? let's just return the name return nice_name; } } } // namespace java_names