libredex/RedexContext.h (229 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #pragma once #include <array> #include <boost/functional/hash.hpp> #include <cstring> #include <deque> #include <functional> #include <limits> #include <list> #include <map> #include <mutex> #include <sstream> #include <string_view> #include <unordered_map> #include <vector> #include "ConcurrentContainers.h" #include "Debug.h" #include "DexMemberRefs.h" #include "FrequentlyUsedPointersCache.h" class DexCallSite; class DexClass; class DexLocation; class DexDebugInstruction; class DexField; class DexFieldRef; class DexMethod; class DexMethodHandle; class DexMethodRef; class DexProto; class DexString; class DexType; class DexTypeList; class PositionPatternSwitchManager; struct DexDebugEntry; struct DexFieldSpec; struct DexPosition; struct RedexContext; namespace keep_rules { struct AssumeReturnValue; } // namespace keep_rules extern RedexContext* g_redex; #if defined(__SSE4_2__) && defined(__linux__) && defined(__STRCMP_LESS__) extern "C" bool strcmp_less(const char* str1, const char* str2); #endif struct RedexContext { explicit RedexContext(bool allow_class_duplicates = false); ~RedexContext(); const DexString* make_string(std::string_view s); const DexString* get_string(std::string_view s); DexType* make_type(const DexString* dstring); DexType* get_type(const DexString* dstring); /** * Change the name of a type, but do not remove the old name from the mapping */ void set_type_name(DexType* type, const DexString* new_name); /** * Add an additional name to refer to a type (a deobfuscated name for example) */ void alias_type_name(DexType* type, const DexString* new_name); /** * Remove a name -> type entry from the map */ void remove_type_name(const DexString* name); DexFieldRef* make_field(const DexType* container, const DexString* name, const DexType* type); DexFieldRef* get_field(const DexType* container, const DexString* name, const DexType* type); /** * Add an additional name to refer to a field (a deobfuscated name for * example) */ void alias_field_name(DexFieldRef* field, const DexString* new_name); void erase_field(DexFieldRef*); void erase_field(const DexType* container, const DexString* name, const DexType* type); void mutate_field(DexFieldRef* field, const DexFieldSpec& ref, bool rename_on_collision); using DexTypeListContainerType = std::vector<DexType*>; DexTypeList* make_type_list(DexTypeListContainerType&& p); DexTypeList* get_type_list(const DexTypeListContainerType& p); DexProto* make_proto(const DexType* rtype, const DexTypeList* args, const DexString* shorty); DexProto* get_proto(const DexType* rtype, const DexTypeList* args); DexMethodRef* make_method(const DexType* type, const DexString* name, const DexProto* proto); DexMethodRef* get_method(const DexType* type, const DexString* name, const DexProto* proto); /** * Add an additional name to refer to a method (a deobfuscated name for * example) */ void alias_method_name(DexMethodRef* method, const DexString* new_name); DexMethodHandle* make_methodhandle(); DexMethodHandle* get_methodhandle(); void erase_method(DexMethodRef*); void erase_method(const DexType* type, const DexString* name, const DexProto* proto); void mutate_method(DexMethodRef* method, const DexMethodSpec& new_spec, bool rename_on_collision); DexLocation* make_location(std::string_view store_name, std::string_view file_name); DexLocation* get_location(std::string_view store_name, std::string_view file_name); PositionPatternSwitchManager* get_position_pattern_switch_manager(); // Return false on unique classes // Return true on benign duplicate classes // Throw RedexException on problematic duplicate classes bool class_already_loaded(DexClass* cls); void publish_class(DexClass* cls); DexClass* type_class(const DexType* t); template <class TypeClassWalkerFn = void(const DexType*, const DexClass*)> void walk_type_class(TypeClassWalkerFn walker) { for (const auto& type_cls : m_type_to_class) { walker(type_cls.first, type_cls.second); } } const std::vector<DexClass*>& external_classes() const { return m_external_classes; } // Add a lambda to be called when RedexContext is destructed. This is // especially useful for resetting caches/singletons in tests. using Task = std::function<void(void)>; void add_destruction_task(const Task& t); static constexpr bool kDebugPointersCacheLoad = false; void load_pointers_cache() { m_pointers_cache.load(); m_pointers_cache_loaded = true; } const FrequentlyUsedPointers& pointers_cache() { if (!m_pointers_cache_loaded) { redex_assert(!kDebugPointersCacheLoad); std::lock_guard<std::mutex> lock(m_pointers_cache_lock); load_pointers_cache(); } return m_pointers_cache; } // Set and return field values keep_rules::AssumeReturnValue provided by // proguard rules. void set_field_value(DexField* field, keep_rules::AssumeReturnValue& val); keep_rules::AssumeReturnValue* get_field_value(DexField* field); void unset_field_value(DexField* field); // Set and return method's keep_rules::AssumeReturnValue provided by proguard // rules. void set_return_value(DexMethod* method, keep_rules::AssumeReturnValue& val); keep_rules::AssumeReturnValue* get_return_value(DexMethod* method); void unset_return_value(DexMethod* method); size_t num_sb_interaction_indices() const { return m_sb_interaction_indices.size(); } size_t get_sb_interaction_index(const std::string& interaction) const { auto it = m_sb_interaction_indices.find(interaction); if (it == m_sb_interaction_indices.end()) { return std::numeric_limits<size_t>::max(); } return it->second; } const std::unordered_map<std::string, size_t>& get_sb_interaction_indices() const { return m_sb_interaction_indices; } void set_sb_interaction_index( const std::unordered_map<std::string, size_t>& input); // This is for convenience. bool instrument_mode{false}; private: struct Strcmp; struct TruncatedStringHash; // Hashing is expensive on large strings (long Java type names, string // literals), so we avoid using `std::unordered_map` directly. // // For leaf-level storage we use `std::map` (i.e., a tree). In a sparse // string keyset with large keys this performs better as only the suffix // until first change needs to be compared. // // For sharding, we use two layers. The first layer is a partial string // hash as defined by `TruncatedStringHash`. It picks a segment "close" // to the front and performs reasonably well. A std::array is used for // sharding here (see `LargeStringMap`). // // The second layer optimizes the string comparison. We have additional // data besides the string data pointer, namely the UTF size. We can // avoid comparisons for different string lengths. The second layer // thus shards over it. We use the `ConcurrentContainer` sharding for // this (see `ConcurrentProjectedStringMap`). // // The two layers give infrastructure overhead, however, the base size // of a `std::map` and `ConcurrentContainer` is quite small. using StringMapKey = std::string_view; struct StringMapKeyHash { size_t operator()(const StringMapKey& k) const { return k.size(); } }; template <size_t n_slots = 31> using ConcurrentProjectedStringMap = ConcurrentMapContainer<std::map<std::string_view, const DexString*>, StringMapKey, const DexString*, StringMapKeyHash, Identity, n_slots>; template <size_t n_slots, size_t m_slots> struct LargeStringMap { using AType = std::array<ConcurrentProjectedStringMap<n_slots>, m_slots>; AType map; ConcurrentProjectedStringMap<n_slots>& at(const StringMapKey& k) { size_t hashed = TruncatedStringHash()(k.data(), k.size()) % m_slots; return map[hashed]; } typename AType::iterator begin() { return map.begin(); } typename AType::iterator end() { return map.end(); } }; // Hash a 32-byte subsequence of a given string, offset by 32 bytes from the // start. Dex files tend to contain many strings with the same prefixes, // because every class / method under a given package will share the same // prefix. The offset ensures that we have more unique subsequences to hash. // // An offset of 32 and hash prefix length of 32 seemed to perform best on the // typical strings in an android app. It's important to remain within one // cache line (offset + hash_prefix_len <= 64) and hash enough of the string // to minimize the chance of duplicate sections struct TruncatedStringHash { size_t operator()(const char* s, uint32_t string_size) { constexpr size_t hash_prefix_len = 32; constexpr size_t offset = 32; size_t len = std::min<size_t>(string_size, offset + hash_prefix_len); size_t start = std::max<int64_t>(0, int64_t(len - hash_prefix_len)); return boost::hash_range(s + start, s + len); } }; // DexString LargeStringMap<31, 127> s_string_map; // DexType ConcurrentMap<const DexString*, DexType*> s_type_map; // DexFieldRef ConcurrentMap<DexFieldSpec, DexFieldRef*> s_field_map; std::mutex s_field_lock; // DexTypeList struct DexTypeListContainerTypePtrHash { size_t operator()(const DexTypeListContainerType* d) const { return boost::hash<DexTypeListContainerType>()(*d); } }; struct DexTypeListContainerTypePtrEquals { size_t operator()(const DexTypeListContainerType* lhs, const DexTypeListContainerType* rhs) const { return lhs == rhs || *lhs == *rhs; } }; ConcurrentMap<const DexTypeListContainerType*, DexTypeList*, DexTypeListContainerTypePtrHash, DexTypeListContainerTypePtrEquals> s_typelist_map; // DexProto using ProtoKey = std::pair<const DexType*, const DexTypeList*>; ConcurrentMap<ProtoKey, DexProto*, boost::hash<ProtoKey>> s_proto_map; // DexMethod ConcurrentMap<DexMethodSpec, DexMethodRef*> s_method_map; std::mutex s_method_lock; // DexLocation using ClassLocationKey = std::pair<std::string_view, std::string_view>; struct ClassLocationKeyHash { size_t operator()(const ClassLocationKey& k) const { return std::hash<std::string_view>()(k.second); } }; ConcurrentMap<ClassLocationKey, DexLocation*, ClassLocationKeyHash> s_location_map; // DexPositionSwitch and DexPositionPattern PositionPatternSwitchManager* m_position_pattern_switch_manager{nullptr}; // Type-to-class map std::mutex m_type_system_mutex; std::unordered_map<const DexType*, DexClass*> m_type_to_class; std::vector<DexClass*> m_external_classes; const std::vector<const DexType*> m_empty_types; // These functions will be called when ~RedexContext() is called std::mutex m_destruction_tasks_lock; std::vector<Task> m_destruction_tasks; std::unordered_map<std::string, size_t> m_sb_interaction_indices; bool m_allow_class_duplicates; bool m_pointers_cache_loaded{false}; std::mutex m_pointers_cache_lock; FrequentlyUsedPointers m_pointers_cache; // Field values map specified by Proguard assume value ConcurrentMap<DexField*, std::unique_ptr<keep_rules::AssumeReturnValue>> field_values; // Return values map specified by Proguard assume value ConcurrentMap<DexMethod*, std::unique_ptr<keep_rules::AssumeReturnValue>> method_return_values; };