libredex/ReachableClasses.cpp (594 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #include "ReachableClasses.h" #include <boost/filesystem.hpp> #include <chrono> #include <fstream> #include <iostream> #include <sstream> #include <string> #include <unordered_set> #include "ClassHierarchy.h" #include "DexClass.h" #include "FbjniMarker.h" #include "Match.h" #include "RedexResources.h" #include "ReflectionAnalysis.h" #include "Show.h" #include "StringUtil.h" #include "Trace.h" #include "TypeSystem.h" #include "Walkers.h" namespace { using namespace reflection; template <typename T, typename F> struct DexItemIter {}; template <typename F> struct DexItemIter<DexField*, F> { static void iterate(DexClass* cls, F& yield) { if (cls->is_external()) return; for (auto* field : cls->get_sfields()) { yield(field); } for (auto* field : cls->get_ifields()) { yield(field); } } }; template <typename F> struct DexItemIter<DexMethod*, F> { static void iterate(DexClass* cls, F& yield) { if (cls->is_external()) return; for (auto* method : cls->get_dmethods()) { yield(method); } for (auto* method : cls->get_vmethods()) { yield(method); } } }; /* * Prevent a class from being deleted due to its being referenced via * reflection. :reflecting_method is the method containing the reflection site. */ void blocklist_field(DexMethod* reflecting_method, DexType* type, const DexString* name, bool declared) { auto* cls = type_class(type); if (cls == nullptr) { return; } auto yield = [&](DexField* t) { if (t->get_name() != name) { return; } if (!is_public(t) && !declared) { return; } TRACE(PGR, 4, "SRA BLOCK_LIST: %s", SHOW(t)); t->rstate.set_root(keep_reason::REFLECTION, reflecting_method); }; DexItemIter<DexField*, decltype(yield)>::iterate(cls, yield); if (!declared) { auto super_cls = cls->get_super_class(); if (super_cls != nullptr) { blocklist_field(reflecting_method, super_cls, name, declared); } } } void blocklist_method(DexMethod* reflecting_method, DexType* type, const DexString* name, const boost::optional<std::vector<DexType*>>& params, bool declared) { auto* cls = type_class(type); if (cls == nullptr) { return; } auto yield = [&](DexMethod* t) { if (t->get_name() != name) { return; } if (params != boost::none && !t->get_proto()->get_args()->equals(*params)) { return; } if (!is_public(t) && !declared) { return; } TRACE(PGR, 4, "SRA BLOCK_LIST: %s", SHOW(t)); t->rstate.set_root(keep_reason::REFLECTION, reflecting_method); }; DexItemIter<DexMethod*, decltype(yield)>::iterate(cls, yield); if (!declared) { auto super_cls = cls->get_super_class(); if (super_cls != nullptr) { blocklist_method(reflecting_method, super_cls, name, params, declared); } } } void analyze_reflection(const Scope& scope) { enum ReflectionType { GET_FIELD, GET_DECLARED_FIELD, GET_METHOD, GET_DECLARED_METHOD, GET_CONSTRUCTOR, GET_DECLARED_CONSTRUCTOR, INT_UPDATER, LONG_UPDATER, REF_UPDATER, }; const auto JAVA_LANG_CLASS = "Ljava/lang/Class;"; const auto ATOMIC_INT_FIELD_UPDATER = "Ljava/util/concurrent/atomic/AtomicIntegerFieldUpdater;"; const auto ATOMIC_LONG_FIELD_UPDATER = "Ljava/util/concurrent/atomic/AtomicLongFieldUpdater;"; const auto ATOMIC_REF_FIELD_UPDATER = "Ljava/util/concurrent/atomic/AtomicReferenceFieldUpdater;"; const std::unordered_map<std::string, std::unordered_map<std::string, ReflectionType>> refls = { {JAVA_LANG_CLASS, { {"getField", GET_FIELD}, {"getDeclaredField", GET_DECLARED_FIELD}, {"getMethod", GET_METHOD}, {"getDeclaredMethod", GET_DECLARED_METHOD}, {"getConstructor", GET_CONSTRUCTOR}, {"getConstructors", GET_CONSTRUCTOR}, {"getDeclaredConstructor", GET_DECLARED_CONSTRUCTOR}, {"getDeclaredConstructors", GET_DECLARED_CONSTRUCTOR}, }}, {ATOMIC_INT_FIELD_UPDATER, { {"newUpdater", INT_UPDATER}, }}, {ATOMIC_LONG_FIELD_UPDATER, { {"newUpdater", LONG_UPDATER}, }}, {ATOMIC_REF_FIELD_UPDATER, { {"newUpdater", REF_UPDATER}, }}, }; auto dex_string_lookup = [](const ReflectionAnalysis& analysis, ReflectionType refl_type, IRInstruction* insn) { if (refl_type == GET_CONSTRUCTOR || refl_type == GET_DECLARED_CONSTRUCTOR) { return DexString::get_string("<init>"); } int arg_str_idx = refl_type == ReflectionType::REF_UPDATER ? 2 : 1; auto arg_str = analysis.get_abstract_object(insn->src(arg_str_idx), insn); if (arg_str && arg_str->obj_kind == AbstractObjectKind::STRING) { return arg_str->dex_string; } else { return (const DexString*)nullptr; } }; reflection::MetadataCache refl_metadata_cache; std::mutex mutation_mutex; walk::parallel::code(scope, [&](DexMethod* method, IRCode& code) { std::unique_ptr<ReflectionAnalysis> analysis = nullptr; for (auto& mie : InstructionIterable(code)) { IRInstruction* insn = mie.insn; if (!opcode::is_an_invoke(insn->opcode())) { continue; } // See if it matches something in refls auto& method_name = insn->get_method()->get_name()->str(); auto& method_class_name = insn->get_method()->get_class()->get_name()->str(); auto method_map = refls.find(method_class_name); if (method_map == refls.end()) { continue; } auto refl_entry = method_map->second.find(method_name); if (refl_entry == method_map->second.end()) { continue; } ReflectionType refl_type = refl_entry->second; // Instantiating the analysis object also runs the reflection analysis // on the method. So, we wait until we're sure we need it. // We use a unique_ptr so that we'll still only have one per method. if (!analysis) { analysis = std::make_unique<ReflectionAnalysis>( /* dex_method */ method, /* context (interprocedural only) */ nullptr, /* summary_query_fn (interprocedural only) */ nullptr, /* metadata_cache */ &refl_metadata_cache); } auto arg_cls = analysis->get_abstract_object(insn->src(0), insn); if (!arg_cls || arg_cls->obj_kind != AbstractObjectKind::CLASS) { continue; } // Deal with methods that take a varying number of arguments. auto arg_str_value = dex_string_lookup(*analysis, refl_type, insn); if (arg_str_value == nullptr) { continue; } boost::optional<std::vector<DexType*>> param_types = boost::none; if (refl_type == GET_METHOD || refl_type == GET_CONSTRUCTOR || refl_type == GET_DECLARED_METHOD || refl_type == GET_DECLARED_CONSTRUCTOR) { param_types = analysis->get_method_params(insn); } // Grab a lock before making any changes to avoid race conditions. All // code above is read-only and runs in parallel std::lock_guard<std::mutex> l(mutation_mutex); TRACE(PGR, 4, "SRA ANALYZE: %s: type:%d %s.%s cls: %d %s %s str: %s", insn->get_method()->get_name()->str().c_str(), refl_type, method_class_name.c_str(), method_name.c_str(), arg_cls->obj_kind, SHOW(arg_cls->dex_type), SHOW(arg_cls->dex_string), SHOW(arg_str_value)); switch (refl_type) { case GET_FIELD: blocklist_field(method, arg_cls->dex_type, arg_str_value, false); break; case GET_DECLARED_FIELD: blocklist_field(method, arg_cls->dex_type, arg_str_value, true); break; case GET_METHOD: case GET_CONSTRUCTOR: blocklist_method(method, arg_cls->dex_type, arg_str_value, param_types, false); break; case GET_DECLARED_METHOD: case GET_DECLARED_CONSTRUCTOR: blocklist_method(method, arg_cls->dex_type, arg_str_value, param_types, true); break; case INT_UPDATER: case LONG_UPDATER: case REF_UPDATER: blocklist_field(method, arg_cls->dex_type, arg_str_value, true); break; } } }); } /** * Indicates that a class is being used via reflection. * * Examples: * * Bar.java: * Object x = Class.forName("com.facebook.Foo").newInstance(); * * MyGreatLayout.xml: * <com.facebook.MyTerrificView /> */ void mark_reachable_by_classname(DexClass* dclass) { if (dclass == nullptr) return; dclass->rstate.ref_by_string(); // When we mark a class as reachable, we also mark all fields and methods as // reachable. Eventually we will be smarter about this, which will allow us // to remove unused methods and fields. for (DexMethod* dmethod : dclass->get_dmethods()) { dmethod->rstate.ref_by_string(); } for (DexMethod* vmethod : dclass->get_vmethods()) { vmethod->rstate.ref_by_string(); } for (DexField* sfield : dclass->get_sfields()) { sfield->rstate.ref_by_string(); } for (DexField* ifield : dclass->get_ifields()) { ifield->rstate.ref_by_string(); } } void mark_reachable_by_native(const DexType* dtype) { auto dclass = type_class_internal(dtype); if (dclass == nullptr) { return; } dclass->rstate.set_keepnames(keep_reason::NATIVE); for (DexMethod* dmethod : dclass->get_dmethods()) { dmethod->rstate.set_keepnames(keep_reason::NATIVE); } for (DexMethod* vmethod : dclass->get_vmethods()) { vmethod->rstate.set_keepnames(keep_reason::NATIVE); } for (DexField* sfield : dclass->get_sfields()) { sfield->rstate.set_keepnames(keep_reason::NATIVE); } for (DexField* ifield : dclass->get_ifields()) { ifield->rstate.set_keepnames(keep_reason::NATIVE); } } void mark_reachable_by_string(DexMethod* method) { if (method == nullptr) { return; } if (auto cls = type_class_internal(method->get_class())) { cls->rstate.ref_by_string(); } method->rstate.ref_by_string(); } void mark_reachable_by_classname(DexType* dtype) { mark_reachable_by_classname(type_class_internal(dtype)); } // Possible methods for an android:onClick accept 1 argument that is a View. // Source: // https://android.googlesource.com/platform/frameworks/base/+/android-8.0.0_r15/core/java/android/view/View.java#5331 // Returns true if it matches that criteria, and it's in the set of known // attribute values. bool matches_onclick_method(const DexMethod* dmethod, const std::set<std::string>& names_to_keep) { auto prototype = dmethod->get_proto(); auto args_list = prototype->get_args(); if (args_list->size() == 1) { auto first_type = args_list->at(0); if (strcmp(first_type->c_str(), "Landroid/view/View;") == 0) { std::string method_name = dmethod->c_str(); return names_to_keep.count(method_name) > 0; } } return false; } // Simulates aapt's generated keep statements for any View which has an // android:onClick="foo" attribute. // Example (from aapt): // -keepclassmembers class * { *** foo(...); } // // This version however is much more specific, since keeping every method "foo" // is overkill. We only need to keep methods "foo" defined on a subclass of // android.content.Context that accept 1 argument (an android.view.View). void mark_onclick_attributes_reachable( const Scope& scope, const std::set<std::string>& onclick_attribute_values) { if (onclick_attribute_values.empty()) { return; } auto type_context = DexType::get_type("Landroid/content/Context;"); always_assert(type_context != nullptr); auto class_hierarchy = build_type_hierarchy(scope); auto children = get_all_children(class_hierarchy, type_context); for (const auto& t : children) { auto dclass = type_class(t); if (dclass->is_external()) { continue; } // Methods are invoked via reflection. Only public methods are relevant. for (const auto& m : dclass->get_vmethods()) { if (matches_onclick_method(m, onclick_attribute_values)) { TRACE(PGR, 2, "Keeping vmethod %s due to onClick attribute in XML.", SHOW(m)); m->rstate.set_referenced_by_resource_xml(); } } } } DexClass* maybe_class_from_string(const std::string& classname) { auto dtype = DexType::get_type(classname.c_str()); if (dtype == nullptr) { return nullptr; } auto dclass = type_class(dtype); if (dclass == nullptr) { return nullptr; } return dclass; } void mark_manifest_root(const std::string& classname) { auto dclass = maybe_class_from_string(classname); if (dclass == nullptr) { TRACE(PGR, 3, "Dangling reference from manifest: %s", classname.c_str()); return; } TRACE(PGR, 3, "manifest: %s", classname.c_str()); dclass->rstate.set_root(keep_reason::MANIFEST); for (DexMethod* dmethod : dclass->get_ctors()) { dmethod->rstate.set_root(keep_reason::MANIFEST); } } /* * We mark an <activity>'s referenced class as reachable only if it is exported * or has intent filters. Exported Activities may be called from other apps, so * we must treat them as entry points. Activities with intent filters can be * called via implicit intents, and it is difficult to statically determine * which Activity an implicit intent will resolve to, so we treat all potential * recipient Activities as always reachable. For more details, see: * * https://developer.android.com/guide/topics/manifest/activity-element * https://developer.android.com/guide/components/intents-filters * * Note 1: Every Activity must be registered in the manifest before it can be * invoked by an intent (both explicit and implicit ones). Since our class * renamer isn't currently able to rewrite class names in the manifest, we mark * all Activities as non-obfuscatable. * * Note 2: RMU may delete some of the Activities that we haven't marked as entry * points. However, it currently doesn't know how to rewrite the manifest to * remove the corresponding <activity> tags. This seems benign: the Android * runtime appears to be OK with these dangling references. * * Addendum: The other component tags are also governed by the exported * attribute as well as by intent filters, but I (jezng) am not sure if those * are sufficient to statically determine their reachability, so I am taking the * conservative approach. This may be worth revisiting. */ void analyze_reachable_from_manifest( const std::string& apk_dir, const std::unordered_set<std::string>& prune_unexported_components_str) { std::unordered_map<std::string, ComponentTag> string_to_tag{ {"activity", ComponentTag::Activity}, {"activity-alias", ComponentTag::ActivityAlias}}; std::unordered_set<ComponentTag, EnumClassHash> prune_unexported_components; for (const auto& s : prune_unexported_components_str) { prune_unexported_components.emplace(string_to_tag.at(s)); } auto manifest_class_info = [&apk_dir]() { try { auto resources = create_resource_reader(apk_dir); return resources->get_manifest_class_info(); } catch (const std::exception& e) { std::cerr << "Error reading manifest: " << e.what() << std::endl; return ManifestClassInfo{}; } }(); for (const auto& classname : manifest_class_info.application_classes) { mark_manifest_root(classname); } for (const auto& classname : manifest_class_info.instrumentation_classes) { mark_manifest_root(classname); } for (const auto& tag_info : manifest_class_info.component_tags) { switch (tag_info.tag) { case ComponentTag::Activity: case ComponentTag::ActivityAlias: { if (tag_info.is_exported == BooleanXMLAttribute::True || tag_info.has_intent_filters || !prune_unexported_components.count(tag_info.tag)) { mark_manifest_root(tag_info.classname); } else { TRACE(PGR, 3, "%s not exported", tag_info.classname.c_str()); auto dclass = maybe_class_from_string(tag_info.classname); if (dclass) { dclass->rstate.set_keepnames(); } } break; } case ComponentTag::Receiver: case ComponentTag::Service: { mark_manifest_root(tag_info.classname); break; } case ComponentTag::Provider: { mark_manifest_root(tag_info.classname); for (const auto& classname : tag_info.authority_classes) { mark_manifest_root(classname); } break; } } } } void mark_reachable_by_xml(const std::string& classname) { auto dclass = maybe_class_from_string(classname); if (dclass == nullptr) { return; } // Setting "referenced_by_resource_xml" essentially behaves like keep, // though breaking it out to its own flag will let us clear/recompute this. dclass->rstate.set_referenced_by_resource_xml(); // Mark the constructors as used, which should be the expected use case from // layout inflation. for (DexMethod* dmethod : dclass->get_ctors()) { dmethod->rstate.set_referenced_by_resource_xml(); } } // 1) Marks classes (Fragments, Views) found in XML layouts as reachable along // with their constructors. // 2) Marks candidate methods that could be called via android:onClick // attributes. void analyze_reachable_from_xml_layouts(const Scope& scope, const std::string& apk_dir) { std::unordered_set<std::string> layout_classes; std::unordered_set<std::string> attrs_to_read; // Method names used by reflection attrs_to_read.emplace(ONCLICK_ATTRIBUTE); std::unordered_multimap<std::string, std::string> attribute_values; auto resources = create_resource_reader(apk_dir); resources->collect_layout_classes_and_attributes( attrs_to_read, &layout_classes, &attribute_values); for (const std::string& classname : layout_classes) { TRACE(PGR, 3, "xml_layout: %s", classname.c_str()); mark_reachable_by_xml(classname); } auto attr_values = multimap_values_to_set(attribute_values, ONCLICK_ATTRIBUTE); mark_onclick_attributes_reachable(scope, attr_values); } // Set is_serde to be true for all JSON serializer and deserializer classes // that extend any one of supercls_names. void initialize_reachable_for_json_serde( const Scope& scope, const std::vector<std::string>& supercls_names) { std::unordered_set<const DexType*> serde_superclses; for (auto& cls_name : supercls_names) { const DexType* supercls = DexType::get_type(cls_name); if (supercls) { serde_superclses.emplace(supercls); } } if (serde_superclses.empty()) { return; } ClassHierarchy ch = build_type_hierarchy(scope); for (auto* serde_supercls : serde_superclses) { for (auto* child : get_all_children(ch, serde_supercls)) { type_class(child)->rstate.set_is_serde(); } } } void keep_methods(const Scope& scope, const std::vector<std::string>& ms) { std::set<std::string> methods_to_keep(ms.begin(), ms.end()); for (const auto* cls : scope) { for (auto* m : cls->get_dmethods()) { if (methods_to_keep.count(m->get_name()->c_str())) { m->rstate.ref_by_string(); } } for (auto* m : cls->get_vmethods()) { if (methods_to_keep.count(m->get_name()->c_str())) { m->rstate.ref_by_string(); } } } } /* * Returns true iff this class or any of its super classes are in the set of * classes banned due to use of complex reflection. */ bool in_reflected_pkg(DexClass* dclass, std::unordered_set<DexClass*>& reflected_pkg_classes) { if (dclass == nullptr) { // Not in our dex files return false; } if (reflected_pkg_classes.count(dclass)) { return true; } return in_reflected_pkg(type_class_internal(dclass->get_super_class()), reflected_pkg_classes); } /** * Mark serializable class's non-serializable super class's no arg constructor * as root. */ void analyze_serializable(const Scope& scope) { DexType* serializable = DexType::get_type("Ljava/io/Serializable;"); if (!serializable) { return; } TypeSet children; get_all_implementors(scope, serializable, children); for (auto* child : children) { DexClass* child_cls = type_class(child); DexType* child_super_type = child_cls->get_super_class(); DexClass* child_supercls = type_class(child_super_type); if (!child_supercls || child_supercls->is_external()) { continue; } // We should keep the no argument constructors of the superclasses of // any Serializable class, if they are themselves not Serializable. if (!children.count(child_super_type)) { for (auto meth : child_supercls->get_dmethods()) { if (method::is_init(meth) && meth->get_proto()->get_args()->size() == 0) { meth->rstate.set_root(keep_reason::SERIALIZABLE); } } } } } } // namespace /* * Initializes list of classes that are reachable via reflection, and calls * or from code. * * These include: * - Classes used in the manifest (e.g. activities, services, etc) * - View or Fragment classes used in layouts * - Classes that are in certain packages (specified in the reflected_packages * section of the config) and classes that extend from them * - Classes reachable from native libraries */ void init_reachable_classes(const Scope& scope, const ReachableClassesConfig& config) { { Timer t{"Mark keep-methods"}; std::vector<std::string> methods; keep_methods(scope, config.keep_methods); } if (!config.apk_dir.empty()) { if (config.compute_xml_reachability) { Timer t{"Computing XML reachability"}; // Classes present in manifest analyze_reachable_from_manifest(config.apk_dir, config.prune_unexported_components); // Classes present in XML layouts analyze_reachable_from_xml_layouts(scope, config.apk_dir); } if (config.analyze_native_lib_reachability) { Timer t{"Computing native reachability"}; // Classnames present in native libraries (lib/*/*.so) auto resources = create_resource_reader(config.apk_dir); for (const std::string& classname : resources->get_native_classes()) { auto type = DexType::get_type(classname.c_str()); if (type == nullptr) continue; TRACE(PGR, 3, "native_lib: %s", classname.c_str()); mark_reachable_by_classname(type); mark_reachable_by_native(type); } if (!config.fbjni_json_files.empty()) { mark_native_classes_from_fbjni_configs(config.fbjni_json_files); } } walk::methods(scope, [&](DexMethod* meth) { // These were probably already marked by the native lib reachability // analysis above, but just to be doubly sure... if (is_native(meth)) { TRACE(PGR, 3, "native_method: %s", SHOW(meth->get_class())); mark_reachable_by_string(meth); meth->rstate.set_keepnames(keep_reason::NATIVE); } }); } { Timer t{"Analyzing reflection"}; analyze_reflection(scope); std::unordered_set<DexClass*> reflected_package_classes; for (auto clazz : scope) { const char* cname = clazz->get_type()->get_name()->c_str(); for (const auto& pkg : config.reflected_package_names) { if (starts_with(cname, pkg.c_str())) { reflected_package_classes.insert(clazz); continue; } } } for (auto clazz : scope) { if (in_reflected_pkg(clazz, reflected_package_classes)) { reflected_package_classes.insert(clazz); /* Note: * Some of these are by string, others by type * but we have no way in the config to distinguish * them currently. So, we mark with the most * conservative sense here. */ TRACE(PGR, 3, "reflected_package: %s", SHOW(clazz)); mark_reachable_by_classname(clazz); } } } { Timer t{"Analyzing Serializable"}; analyze_serializable(scope); } { Timer t{"Initializing for json serde"}; initialize_reachable_for_json_serde(scope, config.json_serde_supercls); } } void recompute_reachable_from_xml_layouts(const Scope& scope, const std::string& apk_dir) { walk::parallel::classes(scope, [](DexClass* cls) { cls->rstate.unset_referenced_by_resource_xml(); for (auto* method : cls->get_dmethods()) { method->rstate.unset_referenced_by_resource_xml(); } for (auto* method : cls->get_vmethods()) { method->rstate.unset_referenced_by_resource_xml(); } for (auto* field : cls->get_ifields()) { field->rstate.unset_referenced_by_resource_xml(); } for (auto* field : cls->get_sfields()) { field->rstate.unset_referenced_by_resource_xml(); } }); analyze_reachable_from_xml_layouts(scope, apk_dir); } std::string ReferencedState::str() const { std::ostringstream s; s << inner_struct.m_by_string; s << inner_struct.m_by_resources; s << inner_struct.m_is_serde; s << inner_struct.m_keep; s << allowshrinking(); s << allowobfuscation(); s << inner_struct.m_assumenosideeffects; s << inner_struct.m_whyareyoukeeping; return s.str(); }