libredex/DexClass.h (950 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #pragma once #include <cstdlib> #include <cstring> #include <functional> #include <initializer_list> #include <limits> #include <memory> #include <optional> #include <string> #include <utility> #include "Debug.h" #include "DexAccess.h" #include "DexDefs.h" #include "DexEncoding.h" #include "DexMemberRefs.h" #include "NoDefaultComparator.h" #include "ReferencedState.h" /* * The structures defined here are literal representations * of what can be represented in a dex. The main purpose of * the translations present here are to decouple the limitations * of "Idx" representation. All of the "Idx"'s are indexes into * arrays of types in the dex format. They are specific to each * dexfile. So, we transform them in a way that we can load * multiple dexes in memory and compare them symbolically. * * In doing so, we enforce the uniqueness requirements of Idx's * within dexes. There's only one DexString* with the same * set of characters. Only one DexType* that has name "Foo;". * That simplifies the process of re-marshalling to dex after * we've completed whatever transforms we are going to do. * * UNIQUENESS: * The private constructor pattern enforces the uniqueness of * the pointer values of each type that has a uniqueness requirement. * * * * Gather methods: * Most `gather_X` methods are templated over the container type. * Currently only `std::vector` and `std::unordered_set` are supported. * The definitions are not in the header so as to avoid overly broad * imports. */ class DexAnnotationDirectory; class DexAnnotationSet; class DexCallSite; class DexClass; class DexDebugInstruction; class DexEncodedValue; class DexEncodedValueArray; class DexField; class DexIdx; class DexInstruction; class DexMethodHandle; class DexOutputIdx; struct DexPosition; class DexProto; class DexString; class DexType; class PositionMapper; // Must be same as in DexAnnotations.h! using ParamAnnotations = std::map<int, std::unique_ptr<DexAnnotationSet>>; constexpr bool kInsertDeobfuscatedNameLinks = false; using Scope = std::vector<DexClass*>; #if defined(__SSE4_2__) && defined(__linux__) && defined(__STRCMP_LESS__) extern "C" bool strcmp_less(const char* str1, const char* str2); #endif class DexString { friend struct RedexContext; std::string m_storage; uint32_t m_utfsize; // See UNIQUENESS above for the rationale for the private constructor pattern. explicit DexString(std::string nstr) : m_storage(std::move(nstr)), m_utfsize(length_of_utf8_string(m_storage.c_str())) {} public: DexString() = delete; DexString(DexString&&) = delete; DexString(const DexString&) = delete; uint32_t size() const { return static_cast<uint32_t>(m_storage.size()); } // UTF-aware length uint32_t length() const { return m_utfsize; } int32_t java_hashcode() const; // DexString retrieval/creation // If the DexString exists, return it, otherwise create it and return it. // See also get_string() static const DexString* make_string(std::string_view nstr); // Return an existing DexString or nullptr if one does not exist. static const DexString* get_string(std::string_view s); static const std::string EMPTY; public: bool is_simple() const { return size() == m_utfsize; } const char* c_str() const { return m_storage.c_str(); } const std::string& str() const { return m_storage; } uint32_t get_entry_size() const { uint32_t len = uleb128_encoding_size(m_utfsize); len += size(); len++; // NULL byte return len; } void encode(uint8_t* output) const { output = write_uleb128(output, m_utfsize); strcpy((char*)output, c_str()); } }; /* Non-optimizing DexSpec compliant ordering */ inline bool compare_dexstrings(const DexString* a, const DexString* b) { if (a == nullptr) { return b != nullptr; } else if (b == nullptr) { return false; } if (a->is_simple() && b->is_simple()) #if defined(__SSE4_2__) && defined(__linux__) && defined(__STRCMP_LESS__) return strcmp_less(a->c_str(), b->c_str()); #else return (strcmp(a->c_str(), b->c_str()) < 0); #endif /* * Bother, need to do code-point character-by-character * comparison. */ const char* sa = a->c_str(); const char* sb = b->c_str(); /* Equivalence test first, so we don't worry about walking * off the end. */ if (strcmp(sa, sb) == 0) return false; if (strlen(sa) == 0) { return true; } if (strlen(sb) == 0) { return false; } while (1) { uint32_t cpa = mutf8_next_code_point(sa); uint32_t cpb = mutf8_next_code_point(sb); if (cpa == cpb) { if (*sa == '\0') return true; if (*sb == '\0') return false; continue; } return (cpa < cpb); } } struct dexstrings_comparator { bool operator()(const DexString* a, const DexString* b) const { return compare_dexstrings(a, b); } }; class DexType { friend struct RedexContext; const DexString* m_name; // See UNIQUENESS above for the rationale for the private constructor pattern. explicit DexType(const DexString* dstring) { m_name = dstring; } public: DexType() = delete; DexType(DexType&&) = delete; DexType(const DexType&) = delete; // DexType retrieval/creation // If the DexType exists, return it, otherwise create it and return it. // See also get_type() static DexType* make_type(const DexString* dstring); static DexType* make_type(std::string_view str) { return make_type(DexString::make_string(str)); } // Always makes a new type that is unique. static DexType* make_unique_type(const std::string& type_name) { auto ret = DexString::make_string(type_name); for (uint32_t i = 0; get_type(ret); i++) { ret = DexString::make_string(type_name.substr(0, type_name.size() - 1) + "r$" + std::to_string(i) + ";"); } return make_type(ret); } // Return an existing DexType or nullptr if one does not exist. static DexType* get_type(const DexString* dstring); static DexType* get_type(std::string_view str) { return get_type(DexString::get_string(str)); } public: void set_name(const DexString* new_name); const DexString* get_name() const { return m_name; } const char* c_str() const { return get_name()->c_str(); } const std::string& str() const { return get_name()->str(); } DexProto* get_non_overlapping_proto(const DexString*, DexProto*); }; /* Non-optimizing DexSpec compliant ordering */ inline bool compare_dextypes(const DexType* a, const DexType* b) { return compare_dexstrings(a->get_name(), b->get_name()); } struct dextypes_comparator { bool operator()(const DexType* a, const DexType* b) const { return compare_dextypes(a, b); } }; /** * A DexFieldRef is a reference to a DexField. * A reference may or may not map to a definition. * Consider the following: * class A { public int i; } * class B extends A {} * B b = ...; * b.i = 0; * the code compiles to * iput v0, v1 LB;.i:I * B.i does not exist and it's a reference. * The type of the reference is effectively the scope where resolution starts. * DexFieldRef are never really materialized and everything is a DexField. * The API however returns DexFieldRef for references thus imposing some * kind of resolution to get to a definition if needed. */ class DexFieldRef { friend struct RedexContext; friend class DexClass; protected: DexFieldSpec m_spec; bool m_concrete; bool m_external; virtual ~DexFieldRef() {} DexFieldRef(DexType* container, const DexString* name, DexType* type) { m_spec.cls = container; m_spec.name = name; m_spec.type = type; m_concrete = false; m_external = false; } public: DexFieldRef() = delete; DexFieldRef(DexFieldRef&&) = delete; DexFieldRef(const DexFieldRef&) = delete; bool is_concrete() const { return m_concrete; } bool is_external() const { return m_external; } bool is_def() const { return is_concrete() || is_external(); } const DexField* as_def() const; DexField* as_def(); DexType* get_class() const { return m_spec.cls; } const DexString* get_name() const { return m_spec.name; } const char* c_str() const { return get_name()->c_str(); } const std::string& str() const { return get_name()->str(); } DexType* get_type() const { return m_spec.type; } template <typename C> void gather_types_shallow(C& ltype) const; void gather_strings_shallow(std::vector<const DexString*>& lstring) const; void gather_strings_shallow( std::unordered_set<const DexString*>& lstring) const; void change(const DexFieldSpec& ref, bool rename_on_collision = false); DexField* make_concrete(DexAccessFlags access_flags); DexField* make_concrete(DexAccessFlags access_flags, std::unique_ptr<DexEncodedValue> v); static void erase_field(DexFieldRef* f); // This method frees the given `DexFieldRed` - different from `erase_field`, // which removes the field from the `RedexContext`. // // BE SURE YOU REALLY WANT TO DO THIS! Many Redex passes and structures // currently cache references and do not clean up, including global ones. static void delete_field_DO_NOT_USE(DexFieldRef* f) { erase_field(f); delete f; } }; class DexField : public DexFieldRef { friend struct RedexContext; friend class DexFieldRef; /* Concrete method members */ DexAccessFlags m_access; std::unique_ptr<DexAnnotationSet> m_anno; std::unique_ptr<DexEncodedValue> m_value; /* Static Only */ std::string m_deobfuscated_name; // See UNIQUENESS above for the rationale for the private constructor pattern. DexField(DexType* container, const DexString* name, DexType* type); std::string self_show() const; // To avoid "Show.h" in the header. public: DexField() = delete; DexField(DexField&&) = delete; DexField(const DexField&) = delete; ~DexField(); ReferencedState rstate; // Tracks whether this field can be deleted or renamed // DexField retrieval/creation // If the DexField exists, return it, otherwise create it and return it. // See also get_field() static DexFieldRef* make_field(const DexType* container, const DexString* name, const DexType* type); // Return an existing DexField or nullptr if one does not exist. static DexFieldRef* get_field(const DexType* container, const DexString* name, const DexType* type); static DexFieldRef* get_field(const dex_member_refs::FieldDescriptorTokens&); /** * Get a field using a full descriptor: Lcls;.name:type */ static DexFieldRef* get_field(std::string_view); /** * Make a field using a full descriptor: Lcls;.name:type */ static DexFieldRef* make_field(std::string_view); static const DexString* get_unique_name(DexType* container, const DexString* name, DexType* type) { auto ret = name; for (uint32_t i = 0; get_field(container, ret, type); i++) { ret = DexString::make_string(name->str() + "r$" + std::to_string(i)); } return ret; } public: DexAnnotationSet* get_anno_set() const { return m_anno.get(); } DexEncodedValue* get_static_value() const { return m_value.get(); } DexAccessFlags get_access() const { always_assert(is_def()); return m_access; } void set_access(DexAccessFlags access) { always_assert_log(!m_external, "Unexpected external field %s\n", self_show().c_str()); m_access = access; } void set_external(); void set_deobfuscated_name(std::string name) { m_deobfuscated_name = std::move(name); } const std::string& get_deobfuscated_name() const { return m_deobfuscated_name; } const std::string& get_deobfuscated_name_or_empty() const { return m_deobfuscated_name; } // Return just the name of the field. std::string get_simple_deobfuscated_name() const; void set_value(std::unique_ptr<DexEncodedValue> v); std::unique_ptr<DexAnnotationSet> release_annotations(); void clear_annotations(); void attach_annotation_set(std::unique_ptr<DexAnnotationSet> aset); template <typename C> void gather_types(C& ltype) const; void gather_strings(std::vector<const DexString*>& lstring) const; void gather_strings(std::unordered_set<const DexString*>& lstring) const; template <typename C> void gather_fields(C& lfield) const; template <typename C> void gather_methods(C& lmethod) const; private: template <typename C> void gather_strings_internal(C& lstring) const; }; /* Non-optimizing DexSpec compliant ordering */ inline bool compare_dexfields(const DexFieldRef* a, const DexFieldRef* b) { if (a == nullptr) { return b != nullptr; } else if (b == nullptr) { return false; } if (a->get_class() != b->get_class()) { return compare_dextypes(a->get_class(), b->get_class()); } if (a->get_name() != b->get_name()) { return compare_dexstrings(a->get_name(), b->get_name()); } return compare_dextypes(a->get_type(), b->get_type()); } struct dexfields_comparator { bool operator()(const DexFieldRef* a, const DexFieldRef* b) const { return compare_dexfields(a, b); } }; class DexTypeList { public: using ContainerType = std::vector<DexType*>; using value_type = DexType*; using iterator = typename ContainerType::iterator; using const_iterator = typename ContainerType::const_iterator; const_iterator begin() const { return m_list.begin(); } const_iterator end() const { return m_list.end(); } size_t size() const { return m_list.size(); } bool empty() const { return m_list.empty(); } DexType* at(size_t i) const { return m_list.at(i); } // DexTypeList retrieval/creation // If the DexTypeList exists, return it, otherwise create it and return it. // See also get_type_list() static DexTypeList* make_type_list(ContainerType&& p); // Return an existing DexTypeList or nullptr if one does not exist. static DexTypeList* get_type_list(const ContainerType& p); /** * Returns size of the encoded typelist in bytes, input * pointer must be aligned. */ int encode(DexOutputIdx* dodx, uint32_t* output) const; friend bool operator<(const DexTypeList& a, const DexTypeList& b) { auto ita = a.m_list.begin(); auto itb = b.m_list.begin(); while (1) { if (itb == b.m_list.end()) return false; if (ita == a.m_list.end()) return true; if (*ita != *itb) { const DexType* ta = *ita; const DexType* tb = *itb; return compare_dextypes(ta, tb); } ita++; itb++; } } template <typename C> void gather_types(C& ltype) const; bool equals(const std::vector<DexType*>& vec) const { return std::equal(m_list.begin(), m_list.end(), vec.begin(), vec.end()); } DexTypeList* push_front(DexType* t) const; DexTypeList* pop_front() const; DexTypeList* pop_front(size_t n) const; DexTypeList* push_back(DexType* t) const; DexTypeList* push_back(const std::vector<DexType*>& t) const; DexTypeList* replace_head(DexType* new_head) const; private: // See UNIQUENESS above for the rationale for the private constructor pattern. explicit DexTypeList(ContainerType list) : m_list(std::move(list)) {} const ContainerType m_list; friend struct RedexContext; }; inline bool compare_dextypelists(const DexTypeList* a, const DexTypeList* b) { if (a == nullptr) { return b != nullptr; } else if (b == nullptr) { return false; } return *a < *b; } struct dextypelists_comparator { bool operator()(const DexTypeList* a, const DexTypeList* b) const { return compare_dextypelists(a, b); } }; class DexProto { friend struct RedexContext; DexTypeList* m_args; DexType* m_rtype; const DexString* m_shorty; // See UNIQUENESS above for the rationale for the private constructor pattern. DexProto(DexType* rtype, DexTypeList* args, const DexString* shorty) { m_rtype = rtype; m_args = args; m_shorty = shorty; } public: DexProto() = delete; DexProto(DexProto&&) = delete; DexProto(const DexProto&) = delete; // DexProto retrieval/creation // If the DexProto exists, return it, otherwise create it and return it. // See also get_proto() static DexProto* make_proto(const DexType* rtype, const DexTypeList* args, const DexString* shorty); static DexProto* make_proto(const DexType* rtype, const DexTypeList* args); // Return an existing DexProto or nullptr if one does not exist. static DexProto* get_proto(const DexType* rtype, const DexTypeList* args); public: DexType* get_rtype() const { return m_rtype; } DexTypeList* get_args() const { return m_args; } const DexString* get_shorty() const { return m_shorty; } bool is_void() const { return get_rtype() == DexType::make_type("V"); } template <typename C> void gather_types(C& ltype) const; void gather_strings(std::vector<const DexString*>& lstring) const; void gather_strings(std::unordered_set<const DexString*>& lstring) const; }; /* Non-optimizing DexSpec compliant ordering */ inline bool compare_dexprotos(const DexProto* a, const DexProto* b) { if (a == nullptr) { return b != nullptr; } else if (b == nullptr) { return false; } if (a->get_rtype() != b->get_rtype()) { return compare_dextypes(a->get_rtype(), b->get_rtype()); } return (*(a->get_args()) < *(b->get_args())); } struct dexprotos_comparator { bool operator()(const DexProto* a, const DexProto* b) const { return compare_dexprotos(a, b); } }; struct DebugLineItem { uint32_t offset; uint32_t line; DebugLineItem(uint32_t offset, uint32_t line) : offset(offset), line(line) {} }; /* * Dex files encode debug information as a series of opcodes. Internally, we * convert the opcodes that delta-encode position into absolute DexPositions. * The other opcodes get passed directly through. */ enum class DexDebugEntryType { Instruction, Position }; struct DexDebugEntry final { DexDebugEntryType type; uint32_t addr; union { std::unique_ptr<DexPosition> pos; std::unique_ptr<DexDebugInstruction> insn; }; DexDebugEntry(uint32_t addr, std::unique_ptr<DexPosition> pos); DexDebugEntry(uint32_t addr, std::unique_ptr<DexDebugInstruction> insn); // should only be copied via DexDebugItem's copy ctor, which is responsible // for remapping DexPositions' parent pointer DexDebugEntry(const DexDebugEntry&) = delete; DexDebugEntry(DexDebugEntry&& other) noexcept; ~DexDebugEntry(); void gather_strings(std::vector<const DexString*>& lstring) const; void gather_types(std::vector<DexType*>& ltype) const; }; class DexDebugItem { std::vector<DexDebugEntry> m_dbg_entries; uint32_t m_on_disk_size{0}; uint32_t m_source_checksum{0}; uint32_t m_source_offset{0}; DexDebugItem(DexIdx* idx, uint32_t offset); public: DexDebugItem() = default; DexDebugItem(const DexDebugItem&); static std::unique_ptr<DexDebugItem> get_dex_debug(DexIdx* idx, uint32_t offset); public: std::vector<DexDebugEntry>& get_entries() { return m_dbg_entries; } const auto& get_entries() const { return m_dbg_entries; } void set_entries(std::vector<DexDebugEntry> dbg_entries) { m_dbg_entries.swap(dbg_entries); } uint32_t get_line_start() const; uint32_t get_on_disk_size() const { return m_on_disk_size; } uint32_t get_source_checksum() const { return m_source_checksum; } uint32_t get_source_offset() const { return m_source_offset; } void bind_positions(DexMethod* method, const DexString* file); /* Returns number of bytes encoded, *output has no alignment requirements */ static int encode( DexOutputIdx* dodx, uint8_t* output, uint32_t line_start, uint32_t num_params, const std::vector<std::unique_ptr<DexDebugInstruction>>& dbgops); void gather_types(std::vector<DexType*>& ltype) const; void gather_strings(std::vector<const DexString*>& lstring) const; }; std::vector<std::unique_ptr<DexDebugInstruction>> generate_debug_instructions( DexDebugItem* debugitem, PositionMapper* pos_mapper, uint32_t* line_start, std::vector<DebugLineItem>* line_info, uint32_t line_addin); using DexCatches = std::vector<std::pair<DexType*, uint32_t>>; struct DexTryItem { uint32_t m_start_addr; uint16_t m_insn_count; DexCatches m_catches; DexTryItem(uint32_t start_addr, uint32_t insn_count) : m_start_addr(start_addr) { always_assert_log(insn_count <= std::numeric_limits<uint16_t>::max(), "too many instructions in a single try region %d > 2^16", insn_count); m_insn_count = insn_count; } }; class IRCode; class DexCode { friend class DexMethod; uint16_t m_registers_size; uint16_t m_ins_size; uint16_t m_outs_size; std::optional<std::vector<DexInstruction*>> m_insns{std::nullopt}; std::vector<std::unique_ptr<DexTryItem>> m_tries; std::unique_ptr<DexDebugItem> m_dbg; public: static std::unique_ptr<DexCode> get_dex_code(DexIdx* idx, uint32_t offset); // TODO: make it private and find a better way to allow code creation DexCode() : m_registers_size(0), m_ins_size(0), m_outs_size(0), m_insns(std::vector<DexInstruction*>()), m_dbg(nullptr) {} DexCode(const DexCode&); ~DexCode(); public: const DexDebugItem* get_debug_item() const { return m_dbg.get(); } void set_debug_item(std::unique_ptr<DexDebugItem> dbg) { m_dbg = std::move(dbg); } DexDebugItem* get_debug_item() { return m_dbg.get(); } std::unique_ptr<DexDebugItem> release_debug_item() { return std::move(m_dbg); } std::vector<DexInstruction*> release_instructions() { redex_assert(m_insns); auto ret = std::move(*m_insns); m_insns = std::nullopt; return ret; } std::vector<DexInstruction*>& reset_instructions() { m_insns = std::vector<DexInstruction*>{}; return *m_insns; } std::vector<DexInstruction*>& get_instructions() { redex_assert(m_insns); return *m_insns; } const std::vector<DexInstruction*>& get_instructions() const { redex_assert(m_insns); return *m_insns; } void set_instructions(std::vector<DexInstruction*> insns) { m_insns.emplace(std::move(insns)); } std::vector<std::unique_ptr<DexTryItem>>& get_tries() { return m_tries; } const std::vector<std::unique_ptr<DexTryItem>>& get_tries() const { return m_tries; } uint16_t get_registers_size() const { return m_registers_size; } uint16_t get_ins_size() const { return m_ins_size; } uint16_t get_outs_size() const { return m_outs_size; } void set_registers_size(uint16_t sz) { m_registers_size = sz; } void set_ins_size(uint16_t sz) { m_ins_size = sz; } void set_outs_size(uint16_t sz) { m_outs_size = sz; } /* * Returns number of bytes in encoded output, passed in * pointer must be aligned. Does not encode debugitem, * that must be done later. */ int encode(DexOutputIdx* dodx, uint32_t* output); /* * Returns the number of 2-byte code units needed to encode all the * instructions. */ uint32_t size() const; friend std::string show(const DexCode*); }; /** * A DexMethodRef is a reference to a DexMethod. * A reference may or may not map to a definition. * Consider the following: * class A { public void m() {} } * class B extends A {} * B b = ...; * b.m(); * the code compiles to * invoke-virtual {v0} LB;.m:()V * B.m() does not exist and it's a reference. * The type of the reference is effectively the scope where resolution starts. * DexMethodRef are never really materialized and everything is a DexMethod. * The API however returns DexMethodRef for references thus imposing some * kind of resolution to get to a definition if needed. */ class DexMethodRef { friend struct RedexContext; friend class DexClass; protected: DexMethodSpec m_spec; bool m_concrete; bool m_external; ~DexMethodRef() {} DexMethodRef(DexType* type, const DexString* name, DexProto* proto) : m_spec(type, name, proto) { m_concrete = false; m_external = false; } public: DexMethodRef() = delete; DexMethodRef(DexMethodRef&&) = delete; DexMethodRef(const DexMethodRef&) = delete; bool is_concrete() const { return m_concrete; } bool is_external() const { return m_external; } bool is_def() const { return is_concrete() || is_external(); } const DexMethod* as_def() const; DexMethod* as_def(); DexType* get_class() const { return m_spec.cls; } const DexString* get_name() const { return m_spec.name; } const char* c_str() const { return get_name()->c_str(); } const std::string& str() const { return get_name()->str(); } DexProto* get_proto() const { return m_spec.proto; } template <typename C> void gather_types_shallow(C& ltype) const; void gather_strings_shallow(std::vector<const DexString*>& lstring) const; void gather_strings_shallow( std::unordered_set<const DexString*>& lstring) const; void change(const DexMethodSpec& ref, bool rename_on_collision); DexMethod* make_concrete(DexAccessFlags, std::unique_ptr<DexCode>, bool is_virtual); DexMethod* make_concrete(DexAccessFlags, std::unique_ptr<IRCode>, bool is_virtual); DexMethod* make_concrete(DexAccessFlags access, bool is_virtual); // This only removes the given method reference from the `RedexContext`, but // does not free the method. static void erase_method(DexMethodRef* mref); }; class DexMethod : public DexMethodRef { friend struct RedexContext; friend class DexMethodRef; /* Concrete method members */ // Place these first to avoid/fill padding from DexMethodRef. bool m_virtual{false}; DexAccessFlags m_access; std::unique_ptr<DexAnnotationSet> m_anno; std::unique_ptr<DexCode> m_dex_code; std::unique_ptr<IRCode> m_code; std::unique_ptr<ParamAnnotations> m_param_anno; const DexString* m_deobfuscated_name{nullptr}; // See UNIQUENESS above for the rationale for the private constructor pattern. DexMethod(DexType* type, const DexString* name, DexProto* proto); ~DexMethod(); // For friend classes to use with smart pointers. struct Deleter { void operator()(DexMethod* m) { delete m; } }; std::string self_show() const; // To avoid "Show.h" in the header. public: DexMethod() = delete; DexMethod(DexMethodRef&&) = delete; DexMethod(const DexMethodRef&) = delete; // Tracks whether this method can be deleted or renamed ReferencedState rstate; // DexMethod retrieval/creation // If the DexMethod exists, return it, otherwise create it and return it. // See also get_method() static DexMethodRef* make_method(const DexType* type, const DexString* name, const DexProto* proto); static DexMethodRef* make_method(const DexMethodSpec& spec); /** * Create a copy of method `that`. This excludes `rstate`. */ static DexMethod* make_method_from(DexMethod* that, DexType* target_cls, const DexString* name); // Make a copy of method `that`, including the `rstate`. static DexMethod* make_full_method_from(DexMethod* that, DexType* target_cls, const DexString* name); /** * This creates everything along the chain of Dex<Member>, so it should * be used for members that either exist or would be created anyway. */ static DexMethodRef* make_method(const char* cls_name, const char* meth_name, const char* rtype_str, const std::vector<const char*>& arg_strs) { DexType* cls = DexType::make_type(cls_name); auto* name = DexString::make_string(meth_name); DexType* rtype = DexType::make_type(rtype_str); DexTypeList::ContainerType args; for (auto const arg_str : arg_strs) { DexType* arg = DexType::make_type(arg_str); args.push_back(arg); } DexTypeList* dtl = DexTypeList::make_type_list(std::move(args)); return make_method(cls, name, DexProto::make_proto(rtype, dtl)); } /** * Creates a method reference from its signature given as a collection of * strings. */ static DexMethodRef* make_method(const std::string& class_type, const std::string& name, std::initializer_list<std::string> arg_types, const std::string& return_type); static DexMethodRef* get_method( const dex_member_refs::MethodDescriptorTokens&); /** * Get a method using a full descriptor: Lcls;.name:(args)rtype * * When `kCheckFormat` = true, syntactical issues in the string * will lead to asserts, i.e., throws. */ template <bool kCheckFormat = false> static DexMethodRef* get_method(std::string_view); /** * Make a method using a full descriptor: Lcls;.name:(args)rtype */ static DexMethodRef* make_method(std::string_view); // Return an existing DexMethod or nullptr if one does not exist. static DexMethodRef* get_method(const DexType* type, const DexString* name, const DexProto* proto); static DexMethodRef* get_method(const DexMethodSpec& spec); static const DexString* get_unique_name(DexType* type, const DexString* name, DexProto* proto) { auto ret = name; for (uint32_t i = 0; get_method(type, ret, proto); i++) { ret = DexString::make_string(name->str() + "r$" + std::to_string(i)); } return ret; } public: const DexAnnotationSet* get_anno_set() const { return m_anno.get(); } DexAnnotationSet* get_anno_set() { return m_anno.get(); } const DexCode* get_dex_code() const { return m_dex_code.get(); } DexCode* get_dex_code() { return m_dex_code.get(); } IRCode* get_code() { return m_code.get(); } const IRCode* get_code() const { return m_code.get(); } std::unique_ptr<IRCode> release_code(); bool is_virtual() const { return m_virtual; } DexAccessFlags get_access() const { always_assert(is_def()); return m_access; } const ParamAnnotations* get_param_anno() const { return m_param_anno.get(); } ParamAnnotations* get_param_anno() { return m_param_anno.get(); } std::unique_ptr<ParamAnnotations> release_param_anno(); void set_deobfuscated_name(const std::string& name); void set_deobfuscated_name(const DexString* name); void set_deobfuscated_name(const DexString& name); const DexString& get_deobfuscated_name() const { redex_assert(m_deobfuscated_name != nullptr); return *m_deobfuscated_name; } const DexString* get_deobfuscated_name_or_null() const { return m_deobfuscated_name; } const std::string& get_deobfuscated_name_or_empty() const { if (m_deobfuscated_name == nullptr) { return DexString::EMPTY; ; } return m_deobfuscated_name->str(); } // Return just the name of the method. std::string get_simple_deobfuscated_name() const; // Return a really fully deobfuscated name, even for a generated method. // TODO(redex): this can be removed now. std::string get_fully_deobfuscated_name() const; void set_access(DexAccessFlags access) { always_assert_log(!m_external, "Unexpected external method %s\n", self_show().c_str()); m_access = access; } void set_virtual(bool is_virtual) { always_assert_log(!m_external, "Unexpected external method %s\n", self_show().c_str()); m_virtual = is_virtual; } void set_external(); void set_dex_code(std::unique_ptr<DexCode> code) { m_dex_code = std::move(code); } void set_code(std::unique_ptr<IRCode> code); void make_non_concrete(); void become_virtual(); std::unique_ptr<DexAnnotationSet> release_annotations(); void clear_annotations(); /** * Note that this is to combine annotation for two methods that should * have same set of parameters. This is used in vertical merging when * merging parent and child's inherited method. If you want to use this * method you should check if their protos are the same before using this. */ void combine_annotations_with(DexMethod* other); void add_load_params(size_t num_add_loads); void attach_annotation_set(std::unique_ptr<DexAnnotationSet> aset); void attach_param_annotation_set(int paramno, std::unique_ptr<DexAnnotationSet> aset); template <typename C> void gather_types(C& ltype) const; template <typename C> void gather_fields(C& lfield) const; template <typename C> void gather_methods(C& lmethod) const; template <typename C> void gather_methods_from_annos(C& lmethod) const; void gather_strings(std::vector<const DexString*>& lstring, bool exclude_loads = false) const; void gather_strings(std::unordered_set<const DexString*>& lstring, bool exclude_loads = false) const; template <typename C> void gather_callsites(C& lcallsite) const; template <typename C> void gather_methodhandles(C& lmethodhandle) const; void gather_init_classes(std::vector<DexType*>& ltype) const; /* * DexCode <-> IRCode conversion methods. * * In general DexCode is only used in the load / output phases, and in tests * when we wish to verify that we have generated specific instructions. * * Most operations can and should use IRCode. Optimizations should never * have to call sync(). */ void balloon(); void sync(); // This method frees the given `DexMethod` - different from `erase_method`, // which removes the method from the `RedexContext`. // // BE SURE YOU REALLY WANT TO DO THIS! Many Redex passes and structures // currently cache references and do not clean up, including global ones like // `MethodProfiles` which maps `DexMethodRef`s to data. static void delete_method_DO_NOT_USE(DexMethod* method) { delete method; } // This method currently does *NOT* free the `DexMethod`, as there may still // be references. This may will free most resources associated with the // DexMethod, though. Eventually this will become a full delete. static void delete_method(DexMethod* method); private: template <typename C> void gather_strings_internal(C& lstring, bool exclude_loads) const; }; using dexcode_to_offset = std::unordered_map<DexCode*, uint32_t>; class DexLocation { friend struct RedexContext; private: std::string m_store_name; std::string m_file_name; DexLocation(std::string m_store_name, std::string m_file_name); public: // If the DexLocation exists, return it, otherwise create it and return // it. See also get_() static const DexLocation* make_location(std::string_view store_name, std::string_view file_name); // Return an existing DexLocation or nullptr if one does not exist. static const DexLocation* get_location(std::string_view store_name, std::string_view file_name); const std::string& get_store_name() const { return m_store_name; } // Returns the location of this class - can be dex/jar file. const std::string& get_file_name() const { return m_file_name; } }; class DexClass { private: DexType* m_super_class; DexType* m_self; DexTypeList* m_interfaces; const DexString* m_source_file; std::unique_ptr<DexAnnotationSet> m_anno; const DexString* m_deobfuscated_name{nullptr}; const DexLocation* m_location{nullptr}; std::vector<DexField*> m_sfields; std::vector<DexField*> m_ifields; std::vector<DexMethod*> m_dmethods; std::vector<DexMethod*> m_vmethods; DexAccessFlags m_access_flags; bool m_external; bool m_perf_sensitive; explicit DexClass(const DexLocation* location); void load_class_annotations(DexIdx* idx, uint32_t anno_off); void load_class_data_item(DexIdx* idx, uint32_t cdi_off, std::unique_ptr<DexEncodedValueArray> svalues); friend struct ClassCreator; // This constructor is private on purpose, use DexClass::create instead DexClass(DexIdx* idx, const dex_class_def* cdef, const DexLocation* location); std::string self_show() const; // To avoid "Show.h" in the header. public: ReferencedState rstate; ~DexClass(); // May return nullptr on benign duplicate class static DexClass* create(DexIdx* idx, const dex_class_def* cdef, const DexLocation* location); const std::vector<DexMethod*>& get_dmethods() const { return m_dmethods; } std::vector<DexMethod*>& get_dmethods() { always_assert_log(!m_external, "Unexpected external class %s\n", self_show().c_str()); return m_dmethods; } const std::vector<DexMethod*>& get_vmethods() const { return m_vmethods; } std::vector<DexMethod*>& get_vmethods() { always_assert_log(!m_external, "Unexpected external class %s\n", self_show().c_str()); return m_vmethods; } std::vector<DexMethod*> get_all_methods() const; /* Gets the clinit method, aka the class initializer method. * * Unlike constructors, there's only ever one clinit method. * It takes no arguments and returns void. */ DexMethod* get_clinit() const { for (auto meth : get_dmethods()) { if (strcmp(meth->get_name()->c_str(), "<clinit>") == 0) { return meth; } } return nullptr; } std::vector<DexMethod*> get_ctors() const { std::vector<DexMethod*> ctors; for (auto meth : get_dmethods()) { if (strcmp(meth->get_name()->c_str(), "<init>") == 0) { ctors.push_back(meth); } } return ctors; } bool has_ctors() const { // TODO: There must be a logarithmic approach to this. dmethods are sorted! return !get_ctors().empty(); } void add_method(DexMethod* m); // Removes the method from this class void remove_method(const DexMethod* m); // Remove the method from the class and delete the definition. void remove_method_definition(DexMethod* m); const std::vector<DexField*>& get_sfields() const { return m_sfields; } std::vector<DexField*>& get_sfields() { redex_assert(!m_external); return m_sfields; } const std::vector<DexField*>& get_ifields() const { return m_ifields; } std::vector<DexField*>& get_ifields() { redex_assert(!m_external); return m_ifields; } std::vector<DexField*> get_all_fields() const; void add_field(DexField* f); // Removes the field from this class void remove_field(const DexField* f); // Remove the field from the class and delete the definition. void remove_field_definition(DexField* f); DexField* find_ifield(const char* name, const DexType* field_type) const; DexField* find_sfield(const char* name, const DexType* field_type) const; DexAnnotationDirectory* get_annotation_directory(); DexAccessFlags get_access() const { return m_access_flags; } DexType* get_super_class() const { return m_super_class; } DexType* get_type() const { return m_self; } const DexString* get_name() const { return m_self->get_name(); } const char* c_str() const { return get_name()->c_str(); } const std::string& str() const { return get_name()->str(); } DexTypeList* get_interfaces() const { return m_interfaces; } const DexString* get_source_file() const { return m_source_file; } bool has_class_data() const; bool is_def() const { return true; } bool is_external() const { return m_external; } std::unique_ptr<DexEncodedValueArray> get_static_values(); const DexAnnotationSet* get_anno_set() const { return m_anno.get(); } DexAnnotationSet* get_anno_set() { return m_anno.get(); } void attach_annotation_set(std::unique_ptr<DexAnnotationSet> anno); void set_source_file(const DexString* source_file) { m_source_file = source_file; } /** * This also adds `name` as an alias for this DexType in the g_redex global * type map. */ void set_deobfuscated_name(const std::string& name); void set_deobfuscated_name(const DexString* name); void set_deobfuscated_name(const DexString& name); const DexString& get_deobfuscated_name() const { redex_assert(m_deobfuscated_name != nullptr); return *m_deobfuscated_name; } const DexString* get_deobfuscated_name_or_null() const { return m_deobfuscated_name; } const std::string& get_deobfuscated_name_or_empty() const { if (m_deobfuscated_name == nullptr) { return DexString::EMPTY; } return m_deobfuscated_name->str(); } // Retrieves the (original) location. const DexLocation* get_location() const { return m_location; } void set_access(DexAccessFlags access) { always_assert_log(!m_external, "Unexpected external class %s\n", self_show().c_str()); m_access_flags = access; } void set_external(); void set_super_class(DexType* super_class) { always_assert_log(!m_external, "Unexpected external class %s\n", self_show().c_str()); m_super_class = super_class; } void combine_annotations_with(DexClass* other); void set_interfaces(DexTypeList* intfs) { always_assert_log(!m_external, "Unexpected external class %s\n", self_show().c_str()); m_interfaces = intfs; } void clear_annotations(); /* Encodes class_data_item, returns size in bytes. No * alignment requirements on *output */ int encode(DexOutputIdx* dodx, dexcode_to_offset& dco, uint8_t* output); template <typename C> void gather_types(C& ltype) const; void gather_strings(std::vector<const DexString*>& lstring, bool exclude_loads = false) const; void gather_strings(std::unordered_set<const DexString*>& lstring, bool exclude_loads = false) const; template <typename C> void gather_fields(C& lfield) const; template <typename C> void gather_methods(C& lmethod) const; template <typename C> void gather_callsites(C& lcallsite) const; template <typename C> void gather_methodhandles(C& lmethodhandle) const; void gather_load_types(std::unordered_set<DexType*>& ltype) const; void gather_init_classes(std::vector<DexType*>& ltype) const; // Whether to optimize for perf, instead of space. // This bit is only set by the InterDex pass and not available earlier. bool is_perf_sensitive() const { return m_perf_sensitive; } void set_perf_sensitive(bool value) { m_perf_sensitive = value; } // Find methods and fields from a class using its obfuscated name. DexField* find_field_from_simple_deobfuscated_name( const std::string& field_name); DexMethod* find_method_from_simple_deobfuscated_name( const std::string& method_name); private: void sort_methods(); void sort_fields(); template <typename C> void gather_strings_internal(C& lstring, bool exclude_loads) const; }; inline bool compare_dexclasses(const DexClass* a, const DexClass* b) { return compare_dextypes(a->get_type(), b->get_type()); } struct dexclasses_comparator { bool operator()(const DexClass* a, const DexClass* b) const { return compare_dexclasses(a, b); } }; using DexClasses = std::vector<DexClass*>; using DexClassesVector = std::vector<DexClasses>; /* Non-optimizing DexSpec compliant ordering */ inline bool compare_dexmethods(const DexMethodRef* a, const DexMethodRef* b) { if (a == nullptr) { return b != nullptr; } else if (b == nullptr) { return false; } if (a->get_class() != b->get_class()) { return compare_dextypes(a->get_class(), b->get_class()); } if (a->get_name() != b->get_name()) { return compare_dexstrings(a->get_name(), b->get_name()); } return compare_dexprotos(a->get_proto(), b->get_proto()); } struct dexmethods_comparator { bool operator()(const DexMethodRef* a, const DexMethodRef* b) const { return compare_dexmethods(a, b); } }; /** * Return the DexClass that represents the DexType in input or nullptr if * no such DexClass exists. */ DexClass* type_class(const DexType* t); /** * Return the DexClass that represents an internal DexType or nullptr if * no such DexClass exists. */ inline DexClass* type_class_internal(const DexType* t) { auto dc = type_class(t); if (dc == nullptr || dc->is_external()) return nullptr; return dc; } /** * For a set of classes, compute all referenced strings, types, fields and * methods, such that components are sorted and unique. */ void gather_components(std::vector<const DexString*>& lstring, std::vector<DexType*>& ltype, std::vector<DexFieldRef*>& lfield, std::vector<DexMethodRef*>& lmethod, std::vector<DexCallSite*>& lcallsite, std::vector<DexMethodHandle*>& lmethodhandle, const DexClasses& classes, bool exclude_loads = false); DISALLOW_DEFAULT_COMPARATOR(DexClass) DISALLOW_DEFAULT_COMPARATOR(DexCode) DISALLOW_DEFAULT_COMPARATOR(DexDebugInstruction) DISALLOW_DEFAULT_COMPARATOR(DexDebugItem) DISALLOW_DEFAULT_COMPARATOR(DexFieldRef) DISALLOW_DEFAULT_COMPARATOR(DexField) DISALLOW_DEFAULT_COMPARATOR(DexMethodRef) DISALLOW_DEFAULT_COMPARATOR(DexMethod) DISALLOW_DEFAULT_COMPARATOR(DexOutputIdx) DISALLOW_DEFAULT_COMPARATOR(DexProto) DISALLOW_DEFAULT_COMPARATOR(DexString) DISALLOW_DEFAULT_COMPARATOR(DexType) DISALLOW_DEFAULT_COMPARATOR(DexTypeList)