opt/singleimpl/SingleImplAnalyze.cpp (440 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #include <memory> #include <string> #include <unordered_map> #include <unordered_set> #include <vector> #include "Debug.h" #include "DexOutput.h" #include "DexStore.h" #include "DexUtil.h" #include "ReachableClasses.h" #include "Resolver.h" #include "Show.h" #include "SingleImpl.h" #include "SingleImplDefs.h" #include "StlUtil.h" #include "Trace.h" #include "Walkers.h" struct AnalysisImpl : SingleImplAnalysis { AnalysisImpl(const Scope& scope, const ProguardMap& pg_map, const DexStoresVector& stores) : scope(scope), pg_map(pg_map), xstores(stores) {} void create_single_impl(const TypeMap& single_impl, const TypeSet& intfs, const SingleImplConfig& config); void collect_field_defs(); void collect_method_defs(); void analyze_opcodes(); void escape_cross_stores(); void remove_escaped(); private: DexType* get_and_check_single_impl(DexType* type); void collect_children(const TypeSet& intfs); void check_impl_hierarchy(); void escape_with_clinit(); void escape_with_sfields(); void filter_single_impl(const SingleImplConfig& config); void filter_proguard_special_interface(); void filter_do_not_strip(); void filter_list(const std::vector<std::string>& list, bool keep_match); void filter_by_annotations(const std::vector<std::string>& blocklist); private: const Scope& scope; const ProguardMap& pg_map; XStoreRefs xstores; }; /** * Get the single impl if the type is a single impl or an array of it. * When an array mark the single impl as having an array type. * Return nullptr otherwise. */ DexType* AnalysisImpl::get_and_check_single_impl(DexType* type) { if (single_impls.count(type)) { return type; } if (type::is_array(type)) { auto element_type = type::get_array_element_type(type); redex_assert(element_type); const auto sit = single_impls.find(element_type); if (sit != single_impls.end()) { escape_interface(sit->first, HAS_ARRAY_TYPE); return sit->first; } } return nullptr; } /** * Find all single implemented interfaces. */ void AnalysisImpl::create_single_impl(const TypeMap& single_impl, const TypeSet& intfs, const SingleImplConfig& config) { for (auto const& intf_it : single_impl) { auto intf = intf_it.first; auto intf_cls = type_class(intf); always_assert(intf_cls && !intf_cls->is_external()); if (is_annotation(intf_cls)) continue; auto impl = intf_it.second; auto impl_cls = type_class(impl); always_assert(impl_cls && !impl_cls->is_external()); if (is_annotation(impl_cls)) continue; single_impls[intf].cls = impl; } collect_children(intfs); check_impl_hierarchy(); escape_with_clinit(); escape_with_sfields(); filter_single_impl(config); filter_do_not_strip(); } /** * Filter common function for both white and black list. */ void AnalysisImpl::filter_list(const std::vector<std::string>& list, bool keep_match) { if (list.empty()) return; auto find_in_list = [&](const std::string& name) { for (const std::string& el_name : list) { if (name.compare(0, el_name.size(), el_name) == 0) { return true; } } return false; }; for (const auto& intf_it : single_impls) { const auto intf = intf_it.first; const auto intf_cls = type_class(intf); const std::string& intf_name = intf_cls->get_deobfuscated_name_or_empty(); bool match = find_in_list(intf_name); if (match && keep_match) continue; if (!match && !keep_match) continue; escape_interface(intf, FILTERED); } } void AnalysisImpl::filter_proguard_special_interface() { for (const auto& intf_it : single_impls) { const auto intf = intf_it.first; const auto intf_cls = type_class(intf); const std::string& intf_name = intf_cls->get_deobfuscated_name_or_empty(); if (pg_map.is_special_interface(intf_name)) { escape_interface(intf, FILTERED); } } } void AnalysisImpl::filter_by_annotations( const std::vector<std::string>& blocklist) { std::unordered_set<DexType*> anno_types; for (const auto& s : blocklist) { auto ty = DexType::get_type(s.c_str()); if (ty != nullptr) { anno_types.emplace(ty); } } for (const auto& intf_it : single_impls) { const auto intf = intf_it.first; const auto intf_cls = type_class(intf); if (has_anno(intf_cls, anno_types)) { escape_interface(intf, FILTERED); } } } /** * Apply filters to the set of single impl found. * White lists come first, then black lists. */ void AnalysisImpl::filter_single_impl(const SingleImplConfig& config) { filter_list(config.allowlist, true); filter_list(config.package_allowlist, true); filter_list(config.blocklist, false); filter_list(config.package_blocklist, false); filter_by_annotations(config.anno_blocklist); // TODO(T33109158): Better way to eliminate VerifyError. if (config.filter_proguard_special_interfaces) { filter_proguard_special_interface(); } } /** * Do not optimize DoNotStrip interfaces. */ void AnalysisImpl::filter_do_not_strip() { for (const auto& intf_it : single_impls) { if (!can_delete(type_class(intf_it.first))) { escape_interface(intf_it.first, DO_NOT_STRIP); } } walk::methods(scope, [this](DexMethod* method) { if (root(method)) { for (auto arg_type : *method->get_proto()->get_args()) { if (single_impls.count(arg_type)) { escape_interface(arg_type, DO_NOT_STRIP); } } if (single_impls.count(method->get_class())) { escape_interface(method->get_class(), DO_NOT_STRIP); } } }); walk::fields(scope, [this](DexField* field) { if (root(field)) { if (single_impls.count(field->get_type())) { escape_interface(field->get_type(), DO_NOT_STRIP); } if (single_impls.count(field->get_class())) { escape_interface(field->get_class(), DO_NOT_STRIP); } } }); } /** * Collect direct children of interfaces. */ void AnalysisImpl::collect_children(const TypeSet& intfs) { for (auto& intf : intfs) { for (auto super : *type_class(intf)->get_interfaces()) { auto super_it = single_impls.find(super); if (super_it != single_impls.end()) { super_it->second.children.insert(intf); } } } } /** * Escape if any parent is not known to redex. */ void AnalysisImpl::check_impl_hierarchy() { for (auto& intf_it : single_impls) { if (!klass::has_hierarchy_in_scope(type_class(intf_it.second.cls))) { escape_interface(intf_it.first, IMPL_PARENT_ESCAPED); } } } /** * Escape interfaces with static initializer. */ void AnalysisImpl::escape_with_clinit() { for (auto& intf_it : single_impls) { // strictly speaking this is not checking for a clinit but it's all the // same. Interfaces should not have static methods and even if so we // just escape them. From our analysis it turns out there are few with // clinit only and as expected none with static methods. if (!type_class(intf_it.first)->get_dmethods().empty()) { escape_interface(intf_it.first, CLINIT); } } } /** * Escape interfaces with static fields. Also escape the type of the field * if it is a single impl. * Interface fields may not be scoped to the interface itself and resolved * at runtime. So until we have an analysis that can spot those cases we * give up on interfaces with fields and the type of the field if it is * a single impl. */ void AnalysisImpl::escape_with_sfields() { for (auto const& intf_it : single_impls) { auto intf_cls = type_class(intf_it.first); redex_assert(intf_cls->get_ifields().empty()); always_assert(!intf_cls->is_external()); const auto& sfields = intf_cls->get_sfields(); if (sfields.empty()) continue; escape_interface(intf_it.first, HAS_SFIELDS); for (auto sfield : sfields) { auto ftype = sfield->get_class(); auto simpl = get_and_check_single_impl(ftype); if (simpl) { escape_interface(simpl, HAS_SFIELDS); } } } } /** * If an interface in a store brings a class in a later store drop the * optimization. */ void AnalysisImpl::escape_cross_stores() { for (auto const& intf_it : single_impls) { if (xstores.illegal_ref(intf_it.first, intf_it.second.cls)) { escape_interface(intf_it.first, CROSS_STORES); continue; } // Be conservative: it is possible that the class has cross-store // references itself. Replacing the interface might increase the // chances of that blowing up. auto cls = type_class(intf_it.second.cls); if (cls != nullptr) { if (xstores.illegal_ref_load_types(intf_it.first, cls)) { escape_interface(intf_it.first, CROSS_STORES); static bool warned = false; if (!warned) { warned = true; TRACE(INTF, 0, "Found transitive cross store violation! For details, run with " "TRACE=INTF:1."); } TRACE(INTF, 1, "Warning: found %s which is by itself not a cross-store " "violation for %s but depends on other types that are!", SHOW(cls), SHOW(intf_it.first)); continue; } } } } /** * Clean up the single impl map. */ void AnalysisImpl::remove_escaped() { std20::erase_if(single_impls, [](auto& p) { return p.second.is_escaped(); }); } /** * Find all fields typed with the single impl interface. */ void AnalysisImpl::collect_field_defs() { walk::fields(scope, [&](DexField* field) { auto type = field->get_type(); auto intf = get_and_check_single_impl(type); if (intf) { single_impls[intf].fielddefs.push_back(field); } }); } /** * Find all methods with a single impl interface in their signature. * Also if a method with the interface in the signature is native mark the * interface as "escaped". */ void AnalysisImpl::collect_method_defs() { auto check_method_arg = [&](DexType* type, DexMethod* method, bool native) { auto intf = get_and_check_single_impl(type); if (!intf) return; if (native) { escape_interface(intf, NATIVE_METHOD); } single_impls[intf].methoddefs.insert(method); }; walk::methods(scope, [&](DexMethod* method) { auto proto = method->get_proto(); bool native = is_native(method); check_method_arg(proto->get_rtype(), method, native); for (const auto it : *proto->get_args()) { check_method_arg(it, method, native); } }); } /** * Find all opcodes that reference a single implemented interface in a typeref, * fieldref or methodref. */ void AnalysisImpl::analyze_opcodes() { auto check_arg = [&](DexMethod* referrer, const IRList::iterator& insn_it, DexType* type, DexMethodRef* meth, IRInstruction* insn) { auto intf = get_and_check_single_impl(type); if (intf) { auto& si = single_impls.at(intf); std::lock_guard<std::mutex> lock(si.mutex); si.referencing_methods[referrer][insn] = insn_it; si.methodrefs[meth].insert(insn); } }; auto check_sig = [&](DexMethod* referrer, const IRList::iterator& insn_it, DexMethodRef* meth, IRInstruction* insn) { // check the sig for single implemented interface const auto proto = meth->get_proto(); check_arg(referrer, insn_it, proto->get_rtype(), meth, insn); for (const auto arg : *proto->get_args()) { check_arg(referrer, insn_it, arg, meth, insn); } }; auto check_field = [&](DexMethod* referrer, const IRList::iterator& insn_it, DexFieldRef* field, IRInstruction* insn) { auto cls = field->get_class(); cls = get_and_check_single_impl(cls); if (cls) { escape_interface(cls, HAS_FIELD_REF); } const auto type = field->get_type(); auto intf = get_and_check_single_impl(type); if (intf) { auto& si = single_impls.at(intf); std::lock_guard<std::mutex> lock(si.mutex); si.referencing_methods[referrer][insn] = insn_it; si.fieldrefs[field].push_back(insn); } }; walk::parallel::code(scope, [&](DexMethod* method, IRCode& code) { redex_assert(!code.editable_cfg_built()); // Need *one* way to auto ii = ir_list::InstructionIterable(code); const auto& end = ii.end(); for (auto it = ii.begin(); it != end; ++it) { auto insn = it->insn; auto op = insn->opcode(); switch (op) { // type ref case OPCODE_CONST_CLASS: case OPCODE_CHECK_CAST: case OPCODE_INSTANCE_OF: case OPCODE_NEW_INSTANCE: case OPCODE_NEW_ARRAY: case OPCODE_FILLED_NEW_ARRAY: { auto intf = get_and_check_single_impl(insn->get_type()); if (intf) { auto& si = single_impls.at(intf); std::lock_guard<std::mutex> lock(si.mutex); si.referencing_methods[method][insn] = it.unwrap(); si.typerefs.push_back(insn); } break; } // field ref case OPCODE_IGET: case OPCODE_IGET_WIDE: case OPCODE_IGET_OBJECT: case OPCODE_IPUT: case OPCODE_IPUT_WIDE: case OPCODE_IPUT_OBJECT: { DexFieldRef* field = resolve_field(insn->get_field(), FieldSearch::Instance); if (field == nullptr) { field = insn->get_field(); } check_field(method, it.unwrap(), field, insn); break; } case OPCODE_SGET: case OPCODE_SGET_WIDE: case OPCODE_SGET_OBJECT: case OPCODE_SPUT: case OPCODE_SPUT_WIDE: case OPCODE_SPUT_OBJECT: { DexFieldRef* field = resolve_field(insn->get_field(), FieldSearch::Static); if (field == nullptr) { field = insn->get_field(); } check_field(method, it.unwrap(), field, insn); break; } // method ref case OPCODE_INVOKE_INTERFACE: { // if it is an invoke on the interface method, collect it as such const auto meth = insn->get_method(); const auto owner = meth->get_class(); const auto intf = get_and_check_single_impl(owner); if (intf) { // if the method ref is not defined on the interface // itself drop the optimization const auto& meths = type_class(intf)->get_vmethods(); if (std::find(meths.begin(), meths.end(), meth) == meths.end()) { escape_interface(intf, UNKNOWN_MREF); } else { auto& si = single_impls.at(intf); std::lock_guard<std::mutex> lock(si.mutex); si.referencing_methods[method][insn] = it.unwrap(); si.intf_methodrefs[meth].insert(insn); } } check_sig(method, it.unwrap(), meth, insn); break; } case OPCODE_INVOKE_DIRECT: case OPCODE_INVOKE_STATIC: case OPCODE_INVOKE_VIRTUAL: case OPCODE_INVOKE_SUPER: { const auto meth = insn->get_method(); check_sig(method, it.unwrap(), meth, insn); break; } default: break; } } }); } /** * Main analysis method */ std::unique_ptr<SingleImplAnalysis> SingleImplAnalysis::analyze( const Scope& scope, const DexStoresVector& stores, const TypeMap& single_impl, const TypeSet& intfs, const ProguardMap& pg_map, const SingleImplConfig& config) { std::unique_ptr<AnalysisImpl> single_impls( new AnalysisImpl(scope, pg_map, stores)); single_impls->create_single_impl(single_impl, intfs, config); single_impls->collect_field_defs(); single_impls->collect_method_defs(); single_impls->analyze_opcodes(); single_impls->escape_cross_stores(); single_impls->remove_escaped(); return std::move(single_impls); } void SingleImplAnalysis::escape_interface(DexType* intf, EscapeReason reason) { auto sit = single_impls.find(intf); if (sit == single_impls.end()) return; if (sit->second.escape & reason) return; std::lock_guard<std::mutex> lock(sit->second.mutex); sit->second.escape |= reason; TRACE(INTF, 5, "(ESC) Escape %s => 0x%X", SHOW(intf), reason); const auto intf_cls = type_class(intf); if (intf_cls) { for (auto super_intf : *intf_cls->get_interfaces()) { escape_interface(super_intf, reason); } } } /** * Collect the interfaces to optimize for an optimization step. */ void SingleImplAnalysis::get_interfaces(TypeList& to_optimize) const { for (const auto& sit : single_impls) { auto& data = sit.second; redex_assert(!data.is_escaped()); if (data.children.empty()) { to_optimize.push_back(sit.first); } } // make the optimizable list stable. It's extremely useful for debugging // and also avoids to get different optimizations on different runs on the // same apk. // Sort by vtable size and name std::sort(to_optimize.begin(), to_optimize.end(), [](const DexType* type1, const DexType* type2) { auto size1 = type_class(type1)->get_vmethods().size(); auto size2 = type_class(type2)->get_vmethods().size(); return size1 == size2 ? strcmp(type1->get_name()->c_str(), type2->get_name()->c_str()) < 0 : size1 < size2; }); }