opt/instrument/Instrument.cpp

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #include "Instrument.h" #include "BlockInstrument.h" #include "DexClass.h" #include "DexUtil.h" #include "IRList.h" #include "InitClassesWithSideEffects.h" #include "InterDexPass.h" #include "InterDexPassPlugin.h" #include "Match.h" #include "MethodReference.h" #include "PassManager.h" #include "RedexContext.h" #include "Show.h" #include "Shrinker.h" #include "ShrinkerConfig.h" #include "Timer.h" #include "TypeSystem.h" #include "Walkers.h" #include <boost/algorithm/string.hpp> #include <cmath> #include <fstream> #include <iostream> #include <string> #include <unordered_map> #include <unordered_set> #include <vector> using namespace instrument; /* * This pass performs instrumentation for dynamic (runtime) analysis. * * Analysis code, which should be a static public method, is written in Java. * Its class and method names are specified in the config. This pass then * inserts the method to points of interest. For a starting example, we * implement the "onMethodBegin" instrumentation. */ namespace { constexpr bool instr_debug = false; constexpr const char* SIMPLE_METHOD_TRACING = "simple_method_tracing"; constexpr const char* BASIC_BLOCK_TRACING = "basic_block_tracing"; constexpr const char* METHOD_REPLACEMENT = "methods_replacement"; class InstrumentInterDexPlugin : public interdex::InterDexPassPlugin { public: explicit InstrumentInterDexPlugin(size_t max_analysis_methods) : m_max_analysis_methods(max_analysis_methods) {} size_t reserve_frefs() override { // We may introduce a new field return 1; } size_t reserve_trefs() override { // We introduce a type reference to the analysis class in each dex return 1; } size_t reserve_mrefs() override { // In each dex, we will introduce more method refs from analysis methods. // This makes sure that the inter-dex pass keeps space for new method refs. return m_max_analysis_methods; } private: const size_t m_max_analysis_methods; }; // For example, say that "Lcom/facebook/debug/" is in the set. We match either // "^Lcom/facebook/debug/*" or "^Lcom/facebook/debug;". bool match_class_name(std::string cls_name, const std::unordered_set<std::string>& set) { always_assert(cls_name.back() == ';'); // We also support exact class name (e.g., "Lcom/facebook/Debug;") if (set.count(cls_name)) { return true; } cls_name.back() = '/'; size_t pos = cls_name.find('/', 0); while (pos != std::string::npos) { if (set.count(cls_name.substr(0, pos + 1))) { return true; } pos = cls_name.find('/', pos + 1); } return false; } void instrument_onMethodBegin(DexMethod* method, int index, DexMethod* method_onMethodBegin) { IRCode* code = method->get_code(); assert(code != nullptr); IRInstruction* const_inst = new IRInstruction(OPCODE_CONST); const_inst->set_literal(index); const auto reg_dest = code->allocate_temp(); const_inst->set_dest(reg_dest); IRInstruction* invoke_inst = new IRInstruction(OPCODE_INVOKE_STATIC); invoke_inst->set_method(method_onMethodBegin); invoke_inst->set_srcs_size(1); invoke_inst->set_src(0, reg_dest); // TODO(minjang): Consider using get_param_instructions. // Try to find a right insertion point: the entry point of the method. // We skip any fall throughs and IOPCODE_LOAD_PARRM*. auto insert_point = std::find_if_not( code->begin(), code->end(), [&](const MethodItemEntry& mie) { return mie.type == MFLOW_FALLTHROUGH || (mie.type == MFLOW_OPCODE && opcode::is_a_load_param(mie.insn->opcode())); }); if (insert_point == code->end()) { // No load params. So just insert before the head. insert_point = code->begin(); } else if (insert_point->type == MFLOW_DEBUG) { // Right after the load params, there could be DBG_SET_PROLOGUE_END. // Skip if there is a following POSITION, too. For example: // 1: OPCODE: IOPCODE_LOAD_PARAM_OBJECT v1 // 2: OPCODE: IOPCODE_LOAD_PARAM_OBJECT v2 // 3: DEBUG: DBG_SET_PROLOGUE_END // 4: POSITION: foo.java:42 (this might be optional.) // <== Instrumentation code will be inserted here. // std::advance(insert_point, std::next(insert_point)->type != MFLOW_POSITION ? 1 : 2); } else { // Otherwise, insert_point can be used directly. } code->insert_before(code->insert_before(insert_point, invoke_inst), const_inst); if (instr_debug) { for (auto it = code->begin(); it != code->end(); ++it) { if (it == insert_point) { TRACE(INSTRUMENT, 9, "<==== insertion"); TRACE(INSTRUMENT, 9, "%s", SHOW(*it)); ++it; if (it != code->end()) { TRACE(INSTRUMENT, 9, "%s", SHOW(*it)); ++it; if (it != code->end()) { TRACE(INSTRUMENT, 9, "%s", SHOW(*it)); } } TRACE(INSTRUMENT, 9, ""); break; } TRACE(INSTRUMENT, 9, "%s", SHOW(*it)); } } } void do_simple_method_tracing(DexClass* analysis_cls, DexStoresVector& stores, ConfigFiles& cfg, PassManager& pm, const InstrumentPass::Options& options) { const size_t NUM_SHARDS = options.num_shards; const auto& array_fields = InstrumentPass::patch_sharded_arrays(analysis_cls, NUM_SHARDS); always_assert(array_fields.size() == NUM_SHARDS); const auto& analysis_methods = InstrumentPass::generate_sharded_analysis_methods( analysis_cls, options.analysis_method_name, array_fields, NUM_SHARDS); const auto& analysis_method_map = analysis_methods.first; const auto& analysis_method_names = analysis_methods.second; // Write metadata file with more information. const auto& file_name = cfg.metafile(options.metadata_file_name); std::ofstream ofs(file_name, std::ofstream::out | std::ofstream::trunc); // Write meta info of the meta file: the type of the meta file and version. ofs << "#,simple-method-tracing,1.0" << std::endl; size_t method_id = 0; size_t excluded = 0; std::unordered_set<std::string> method_names; std::vector<DexMethod*> to_instrument; auto worker = [&](DexMethod* method, size_t& total_size) -> int { const auto& name = method->get_deobfuscated_name_or_empty(); always_assert_log( !name.empty(), "Deobfuscated method name can't be empty: obfuscated " "name: %s, class: \'%s\'(%s)", SHOW(method->get_name()), type_class(method->get_class())->get_deobfuscated_name().c_str(), SHOW(method->get_class()->get_name())); always_assert_log( !method_names.count(name), "Deobfuscated method names must be unique, but found duplicate: \'%s\'", SHOW(name)); method_names.insert(name); if (method->get_code() == nullptr) { ofs << "M,-1," << name << ",0,\"" << vshow(method->get_access(), true) << "\"\n"; return 0; } const size_t sum_opcode_sizes = method->get_code()->sum_opcode_sizes(); total_size += sum_opcode_sizes; // Excluding analysis methods myselves. if (analysis_method_names.count(method->get_name()->str()) || method == analysis_cls->get_clinit()) { ++excluded; TRACE(INSTRUMENT, 2, "Excluding analysis method: %s", SHOW(method)); ofs << "M,-1," << name << "," << sum_opcode_sizes << ",\"" << "MYSELF " << vshow(method->get_access(), true) << "\"\n"; return 0; } // Handle allowlist and blocklist. if (!options.allowlist.empty()) { if (InstrumentPass::is_included(method, options.allowlist)) { TRACE(INSTRUMENT, 8, "Allowlist: included: %s", SHOW(method)); } else { ++excluded; TRACE(INSTRUMENT, 9, "Allowlist: excluded: %s", SHOW(method)); return 0; } } // In case of a conflict, when an entry is present in both blocklist // and allowlist, the blocklist is given priority and the entry // is not instrumented. if (InstrumentPass::is_included(method, options.blocklist)) { ++excluded; TRACE(INSTRUMENT, 8, "Blocklist: excluded: %s", SHOW(method)); ofs << "M,-1," << name << "," << sum_opcode_sizes << ",\"" << "BLOCKLIST " << vshow(method->get_access(), true) << "\"\n"; return 0; } TRACE(INSTRUMENT, 8, "%zu: %s", method_id, SHOW(method)); assert(to_instrument.size() == method_id); to_instrument.push_back(method); // Emit metadata to the file. ofs << "M," << method_id << "," << name << "," << sum_opcode_sizes << ",\"" << vshow(method->get_access(), true /*is_method*/) << "\"\n"; ++method_id; return 1; }; auto scope = build_class_scope(stores); TypeSystem ts(scope); // We now have sharded method stats arrays. We interleave methods into // multiple arrays. Say we instrument 11 methods and have 3 arrays. Each array // may have up to floor(11/3) + 1 = 4 methods. Their distributions look like: // // 0 1 // method id 0 1 2 3 4 5 6 7 8 9 0 // array id 0 1 2 0 1 2 0 1 2 0 1 <= i % 3 // array index 0 0 0 1 1 1 2 2 2 3 3 <= i / 3 // // arrays[0] arrays[1] arrays[2] // method id [0, 3, 6, 9] [1, 4, 7, 10] [2, 5, 8] // // Be extremely careful when handling indexes. The Java-side uploader needs to // untangle the arrays. The WWW endpoints do not need to know this complexity. // So, only devices handle this sharding. // // In order to do that, we need to know the total number of methods to be // instrumented. We don't know this number until iterating all methods while // processing exclusions. We take a two-pass approach: // 1) For all methods, collect (method id, method) pairs and write meta data. // 2) Do actual instrumentation. for (const auto& cls : scope) { const auto& cls_name = cls->get_deobfuscated_name_or_empty(); always_assert_log( !method_names.count(cls_name), "Deobfuscated class names must be unique, but found duplicate: %s", SHOW(cls_name)); method_names.insert(cls_name); int instrumented = 0; size_t total_size = 0; for (auto dmethod : cls->get_dmethods()) { instrumented += worker(dmethod, total_size); } for (auto vmethod : cls->get_vmethods()) { instrumented += worker(vmethod, total_size); } ofs << "C," << cls_name << "," << total_size << "," << (instrumented == 0 ? "NONE" : std::to_string(instrumented)) << "," << cls->get_dmethods().size() << "," << cls->get_vmethods().size() << ",\"" << vshow(cls->get_access(), false /*is_method*/) << "\"\n"; // Enumerate all super and interface classes for this class. const auto& obj_type = DexType::get_type("Ljava/lang/Object;"); std::stringstream ss_parents; for (const auto& e : ts.parent_chain(cls->get_type())) { // Exclude myself and obvious java.lang.Object. if (e != obj_type && e != cls->get_type()) { ss_parents << show_deobfuscated(e) << " "; } } if (ss_parents.tellp() > 0) { ofs << "P," << cls_name << ",\"" << ss_parents.str() << "\"\n"; } std::stringstream ss_interfaces; for (const auto& e : ts.get_all_super_interfaces(cls->get_type())) { ss_interfaces << show_deobfuscated(e) << " "; } if (ss_interfaces.tellp() > 0) { ofs << "I," << cls_name << ",\"" << ss_interfaces.str() << "\"\n"; } } // Now we know the total number of methods to be instrumented. Do some // computations and actual instrumentation. const size_t kTotalSize = to_instrument.size(); TRACE(INSTRUMENT, 2, "%zu methods to be instrumented; shard size: %zu (+1)", kTotalSize, kTotalSize / NUM_SHARDS); for (size_t i = 0; i < kTotalSize; ++i) { TRACE(INSTRUMENT, 6, "Sharded %zu => [%zu][%zu] %s", i, (i % NUM_SHARDS), (i / NUM_SHARDS), SHOW(to_instrument[i])); instrument_onMethodBegin(to_instrument[i], (i / NUM_SHARDS) * options.num_stats_per_method, analysis_method_map.at((i % NUM_SHARDS) + 1)); } TRACE(INSTRUMENT, 1, "%zu methods were instrumented (%zu methods were excluded)", method_id, excluded); // Patch stat array sizes. for (size_t i = 0; i < NUM_SHARDS; ++i) { size_t n = kTotalSize / NUM_SHARDS + (i < kTotalSize % NUM_SHARDS ? 1 : 0); // Get obfuscated name corresponding to each sMethodStat[1-N] field. const auto field_name = array_fields.at(i + 1)->get_name()->str(); InstrumentPass::patch_array_size(analysis_cls, field_name, options.num_stats_per_method * n); } // Patch method count constant. always_assert(method_id == kTotalSize); auto field = analysis_cls->find_field_from_simple_deobfuscated_name( "sNumStaticallyInstrumented"); always_assert(field != nullptr); InstrumentPass::patch_static_field(analysis_cls, field->get_name()->str(), kTotalSize); field = analysis_cls->find_field_from_simple_deobfuscated_name("sProfileType"); always_assert(field != nullptr); InstrumentPass::patch_static_field( analysis_cls, field->get_name()->str(), static_cast<int>(ProfileTypeFlags::SimpleMethodTracing)); ofs.close(); TRACE(INSTRUMENT, 2, "Index file was written to: %s", SHOW(file_name)); pm.incr_metric("Instrumented", method_id); pm.incr_metric("Excluded", excluded); } std::unordered_set<std::string> load_blocklist_file( const std::string& file_name) { // Assume the file simply enumerates blocklisted names. std::unordered_set<std::string> ret; std::ifstream ifs(file_name); assert_log(ifs, "Can't open blocklist file: %s\n", SHOW(file_name)); std::string line; while (ifs >> line) { ret.insert(line); } TRACE(INSTRUMENT, 3, "Loaded %zu blocklist entries from %s", ret.size(), SHOW(file_name)); return ret; } void count_source_block_chain_length(DexStoresVector& stores, PassManager& pm) { std::atomic<size_t> longest_list{0}; std::atomic<size_t> sum{0}; std::atomic<size_t> count{0}; walk::parallel::methods(build_class_scope(stores), [&](DexMethod* m) { auto* code = m->get_code(); if (code == nullptr) { return; } boost::optional<size_t> last_known = boost::none; for (auto& mie : *code) { if (mie.type == MFLOW_SOURCE_BLOCK) { size_t len = 0; for (auto* sb = mie.src_block.get(); sb != nullptr; sb = sb->next.get()) { ++len; } count.fetch_add(1); sum.fetch_add(len); if (last_known && *last_known >= len) { continue; } for (;;) { auto cur = longest_list.load(); if (cur >= len) { last_known = cur; break; } if (longest_list.compare_exchange_strong(cur, len)) { last_known = len; break; } } } } }); pm.set_metric("longest_sb_chain", longest_list.load()); pm.set_metric("average100_sb_chain", count.load() > 0 ? 100 * sum.load() / count.load() : 0); } } // namespace constexpr const char* InstrumentPass::STATS_FIELD_NAME; // Find a sequence of opcode that creates a static array. Patch the array size. void InstrumentPass::patch_array_size(DexClass* analysis_cls, const std::string& array_name, const int array_size) { DexMethod* clinit = analysis_cls->get_clinit(); always_assert(clinit != nullptr); auto* code = clinit->get_code(); bool patched = false; walk::matching_opcodes_in_block( *clinit, // Don't find OPCODE_CONST. It might be deduped with others, or changing // this const can affect other instructions. (Well, we might have a // unique const number though.) So, just create a new const load // instruction. LocalDCE can clean up the redundant instructions. std::make_tuple(/* m::const_(), */ m::new_array_(), m::move_result_pseudo_object_(), m::sput_object_()), [&](DexMethod* method, cfg::Block*, const std::vector<IRInstruction*>& insts) { assert(method == clinit); if (insts[2]->get_field()->get_name()->str() != array_name) { return; } IRInstruction* const_inst = new IRInstruction(OPCODE_CONST); const_inst->set_literal(array_size); const auto reg_dest = code->allocate_temp(); const_inst->set_dest(reg_dest); insts[0]->set_src(0, reg_dest); for (auto& mie : InstructionIterable(code)) { if (mie.insn == insts[0]) { code->insert_before(code->iterator_to(mie), const_inst); patched = true; return; } } }); if (!patched) { std::cerr << "[InstrumentPass] error: cannot patch array size." << std::endl; std::cerr << show(clinit->get_code()) << std::endl; exit(1); } TRACE(INSTRUMENT, 2, "%s array was patched: %d", SHOW(array_name), array_size); } void InstrumentPass::patch_static_field(DexClass* analysis_cls, const std::string& field_name, const int new_number) { DexMethod* clinit = analysis_cls->get_clinit(); always_assert(clinit != nullptr); // Find the sput with the given field name. auto* code = clinit->get_code(); IRInstruction* sput_inst = nullptr; IRList::iterator insert_point; for (auto& mie : InstructionIterable(code)) { auto* insn = mie.insn; if (insn->opcode() == OPCODE_SPUT && insn->get_field()->get_name()->str() == field_name) { sput_inst = insn; insert_point = code->iterator_to(mie); break; } } // SPUT can be null if the original field value was encoded in the // static_values_off array. And consider simplifying using make_concrete. if (sput_inst == nullptr) { TRACE(INSTRUMENT, 2, "sput %s was deleted; creating it", SHOW(field_name)); sput_inst = new IRInstruction(OPCODE_SPUT); sput_inst->set_field( DexField::make_field(DexType::make_type(analysis_cls->get_name()), DexString::make_string(field_name), DexType::make_type("I"))); insert_point = code->insert_after(code->get_param_instructions().end(), sput_inst); } // Create a new const instruction just like patch_stat_array_size. IRInstruction* const_inst = new IRInstruction(OPCODE_CONST); const_inst->set_literal(new_number); const auto reg_dest = code->allocate_temp(); const_inst->set_dest(reg_dest); sput_inst->set_src(0, reg_dest); code->insert_before(insert_point, const_inst); TRACE(INSTRUMENT, 2, "%s was patched: %d", SHOW(field_name), new_number); } void InstrumentPass::bind_config() { bind("instrumentation_strategy", "", m_options.instrumentation_strategy); bind("analysis_class_name", "", m_options.analysis_class_name); bind("analysis_method_name", "", m_options.analysis_method_name); bind("blocklist", {}, m_options.blocklist); bind("allowlist", {}, m_options.allowlist); bind("blocklist_file_name", "", m_options.blocklist_file_name); bind("metadata_file_name", "redex-instrument-metadata.txt", m_options.metadata_file_name); bind("num_stats_per_method", 1, m_options.num_stats_per_method); bind("num_shards", 1, m_options.num_shards); // Note: only_cold_start_class is only used for block tracing. bind("only_cold_start_class", false, m_options.only_cold_start_class); bind("methods_replacement", {}, m_options.methods_replacement, "Replacing instance method call with static method call.", Configurable::bindflags::methods::error_if_unresolvable); bind("analysis_method_names", {}, m_options.analysis_method_names); // 0 means the block tracing is effectively method-only tracing. bind("max_num_blocks", 0, m_options.max_num_blocks); bind("instrument_catches", true, m_options.instrument_catches); bind("instrument_blocks_without_source_block", true, m_options.instrument_blocks_without_source_block); bind("instrument_only_root_store", false, m_options.instrument_only_root_store); size_t max_analysis_methods; if (m_options.instrumentation_strategy == SIMPLE_METHOD_TRACING) { max_analysis_methods = m_options.num_shards; } else if (m_options.instrumentation_strategy == BASIC_BLOCK_TRACING) { // Our current DynamicAnalysis has 7 onMethodExits and 1 onMethodBegin. max_analysis_methods = 8; } else { max_analysis_methods = 1; } after_configuration([this, max_analysis_methods] { // Make a small room for additional method refs during InterDex. interdex::InterDexRegistry* registry = static_cast<interdex::InterDexRegistry*>( PluginRegistry::get().pass_registry(interdex::INTERDEX_PASS_NAME)); registry->register_plugin( "INSTRUMENT_PASS_PLUGIN", [max_analysis_methods]() { return new InstrumentInterDexPlugin(max_analysis_methods); }); // Currently we only support instance call to static call. for (auto& pair : m_options.methods_replacement) { always_assert(!is_static(pair.first)); always_assert(is_static(pair.second)); } if (m_options.instrumentation_strategy == METHOD_REPLACEMENT) { always_assert_log( !m_options.methods_replacement.empty(), "Invalid configuration, `methods_replacement` should not be empty\n"); } }); } namespace { // Possible finalize some fields to help Redex clean up unused instrumentation. void maybe_unset_dynamic_analysis(DexStoresVector& stores, ConfigFiles& conf, const std::string& analysis_class_name) { auto analysis_type = DexType::get_type(analysis_class_name); if (analysis_type == nullptr) { return; } auto analysis_cls = type_class(analysis_type); if (analysis_cls == nullptr) { return; } // Undo all can_rename and can_delete on it. analysis_cls->rstate.unset_root(); for (auto* m : analysis_cls->get_all_methods()) { m->rstate.unset_root(); } for (auto* f : analysis_cls->get_all_fields()) { f->rstate.unset_root(); } // We don't care about running it's clinit analysis_cls->rstate.set_clinit_has_no_side_effects(); auto field = analysis_cls->find_field_from_simple_deobfuscated_name( "sNumStaticallyInstrumented"); if (field != nullptr) { // Make it final. The default value should be 0, and may lead to other // optimizations, e.g., by FinalInline. field->set_access(field->get_access() | DexAccessFlags::ACC_FINAL); } } } // namespace void InstrumentPass::eval_pass(DexStoresVector& stores, ConfigFiles& conf, PassManager& mgr) { if (!conf.get_json_config().get("instrument_pass_enabled", false) && !mgr.get_redex_options().instrument_pass_enabled) { maybe_unset_dynamic_analysis(stores, conf, m_options.analysis_class_name); return; } // Note: Could do the inverse and protect necessary members here. } // Check for inclusion in allow/block lists of methods/classes. It supports: // - "Lcom/fb/foo/" matches "^Lcom/fb/foo/*" or "^Lcom/facebook/debug;" // - "Lcom/fb/foo;.bar()V" matches exact full method names. // - "Lcom/fb/foo;.bar*" matches method name prefixes. bool InstrumentPass::is_included(const DexMethod* method, const std::unordered_set<std::string>& set) { if (set.empty()) { return false; } // Try to check for method by its full name. const auto& full_method_name = method->get_deobfuscated_name_or_empty(); if (set.count(full_method_name)) { return true; } // Prefix method name matching. for (const auto& pattern : set) { if (pattern.back() == '*') { if (full_method_name.find(pattern.substr(0, pattern.length() - 1)) != std::string::npos) { return true; } } } return match_class_name(show_deobfuscated(method->get_class()), set); } std::pair<std::unordered_map<int /*shard_num*/, DexMethod*>, std::unordered_set<std::string>> InstrumentPass::generate_sharded_analysis_methods( DexClass* cls, const std::string& template_method_full_name, const std::unordered_map<int /*shard_num*/, DexFieldRef*>& array_fields, const size_t num_shards) { DexMethod* template_method = cls->find_method_from_simple_deobfuscated_name(template_method_full_name); if (template_method == nullptr) { std::cerr << "[InstrumentPass] error: failed to find template method \'" << template_method_full_name << "\' in " << show(*cls) << std::endl; for (const auto& m : cls->get_dmethods()) { std::cerr << " " << show(m) << std::endl; } exit(1); } const std::string& template_method_name = template_method->get_name()->str(); std::unordered_map<int /*shard_num*/, DexMethod*> new_analysis_methods; std::unordered_set<std::string> method_names; // Even if one shard, we create a new method from the template method. for (size_t i = 1; i <= num_shards; ++i) { const auto new_name = template_method_name + std::to_string(i); std::string deobfuscated_name = template_method->get_deobfuscated_name_or_empty(); boost::replace_first(deobfuscated_name, template_method_name, new_name); DexMethod* new_method = DexMethod::make_method_from(template_method, template_method->get_class(), DexString::make_string(new_name)); new_method->set_deobfuscated_name(deobfuscated_name); cls->add_method(new_method); // Patch the array name in newly created method. bool patched = false; walk::matching_opcodes_in_block( *new_method, std::make_tuple(m::sget_object_()), [&](DexMethod* method, cfg::Block*, const std::vector<IRInstruction*>& insts) { DexField* field = static_cast<DexField*>(insts[0]->get_field()); if (field->get_simple_deobfuscated_name() == InstrumentPass::STATS_FIELD_NAME) { // Set the new field created from patch_sharded_arrays. insts[0]->set_field(array_fields.at(i)); patched = true; return; } }); always_assert_log(patched, "Failed to patch sMethodStats1 in %s\n", SHOW(new_method)); method_names.insert(new_name); new_analysis_methods[i] = new_method; TRACE(INSTRUMENT, 2, "Created %s with %s", SHOW(new_method), SHOW(array_fields.at(i))); } // Remove template method. cls->remove_method(template_method); return std::make_pair(new_analysis_methods, method_names); } std::unordered_map<int /*shard_num*/, DexFieldRef*> InstrumentPass::patch_sharded_arrays( DexClass* cls, const size_t num_shards, const std::map<int /*shard_num*/, std::string>& suggested_names) { // Insert additional sMethodStatsN into the clinit // // private static short[] sMethodStats1 = new short[0]; // private static short[] sMethodStats2 = new short[0]; <= Add // ... // private static short[] sMethodStatsN = new short[0]; <= Add // // OPCODE: CONST v0, 0 // OPCODE: NEW_ARRAY v0, [S // OPCODE: IOPCODE_MOVE_RESULT_PSEUDO_OBJECT v1 // OPCODE: SPUT_OBJECT v1, Lcom/foo/Bar;.sMethodStats1:[S // Add => OPCODE: NEW_ARRAY v0, [S // Add => OPCODE: IOPCODE_MOVE_RESULT_PSEUDO_OBJECT v1 // Add => OPCODE: SPUT_OBJECT v1, Lcom/foo/Bar;.sMethodStats2:[S always_assert(num_shards > 0); DexMethod* clinit = cls->get_clinit(); IRCode* code = clinit->get_code(); std::unordered_map<int /*shard_num*/, DexFieldRef*> fields; bool patched = false; walk::matching_opcodes_in_block( *clinit, std::make_tuple(m::new_array_(), m::move_result_pseudo_object_(), m::sput_object_()), [&](DexMethod* method, cfg::Block*, const std::vector<IRInstruction*>& insts) { DexField* template_field = static_cast<DexField*>(insts[2]->get_field()); if (template_field->get_simple_deobfuscated_name() != InstrumentPass::STATS_FIELD_NAME) { return; } // Create new sMethodStatsN fields. Even if num_shard is 1, we create // new field from the template field. Regarding obfuscation, the rename // module runs after InstrumentPass. So, we just need to assign // human-readable names here. for (size_t i = 1; i <= num_shards; i++) { const auto new_name = suggested_names.count(i) ? suggested_names.at(i) : InstrumentPass::STATS_FIELD_NAME + std::to_string(i); auto deobfuscated_name = template_field->get_deobfuscated_name(); boost::replace_first(deobfuscated_name, InstrumentPass::STATS_FIELD_NAME, new_name); DexField* new_field = static_cast<DexField*>( DexField::make_field(template_field->get_class(), DexString::make_string(new_name), template_field->get_type())); new_field->set_deobfuscated_name(deobfuscated_name); new_field->make_concrete( template_field->get_access(), template_field->get_static_value() == nullptr ? nullptr : template_field->get_static_value()->clone()); fields[i] = new_field; TRACE(INSTRUMENT, 2, "Created array: %s", SHOW(new_field)); cls->add_field(new_field); } // Clone the matched three instructions, but with new field names. for (size_t i = num_shards; i >= 1; --i) { auto new_insts = { (new IRInstruction(OPCODE_NEW_ARRAY)) ->set_type(insts[0]->get_type()) ->set_src(0, insts[0]->src(0)), (new IRInstruction(IOPCODE_MOVE_RESULT_PSEUDO_OBJECT)) ->set_dest(insts[1]->dest()), (new IRInstruction(OPCODE_SPUT_OBJECT)) ->set_src(0, insts[2]->src(0)) ->set_field(fields.at(i))}; if (i == 1) { code->replace_opcode(insts[2], new_insts); } else { code->insert_after(insts[2], new_insts); } } patched = true; cls->remove_field(template_field); }); always_assert_log(patched, "Failed to insert sMethodStatsN:\n%s", SHOW(clinit->get_code())); // static short[][] sMethodStatsArray = new short[][] { // sMethodStats1, <== Add // sMethodStats2, <== Add // ... // } // // OPCODE: NEW_ARRAY v0, [[S <== Patch // OPCODE: IOPCODE_MOVE_RESULT_PSEUDO_OBJECT vX // OPCODE: SPUT_OBJECT vX, Lcom/foo;.sMethodStatsArray:[[S // Add => OPCODE: SGET_OBJECT Lcom/foo;.sMethodStats1:[S // Add => OPCODE: IOPCODE_MOVE_RESULT_PSEUDO_OBJECT vY // Add => OPCODE: CONST vN, index // Add => OPCODE: APUT_OBJECT vY, vX, vN // ... // Add => OPCODE: APUT_OBJECT vY, vX, vN auto field = cls->find_field_from_simple_deobfuscated_name("sMethodStatsArray"); always_assert(field != nullptr); InstrumentPass::patch_array_size(cls, field->get_name()->str(), num_shards); patched = false; walk::matching_opcodes_in_block( *clinit, std::make_tuple(m::new_array_(), m::move_result_pseudo_object_(), m::sput_object_()), [&](DexMethod* method, cfg::Block*, const std::vector<IRInstruction*>& insts) { DexField* field = static_cast<DexField*>(insts[2]->get_field()); if (field->get_simple_deobfuscated_name() != "sMethodStatsArray") { return; } const reg_t vX = insts[1]->dest(); const reg_t vY = code->allocate_temp(); const reg_t vN = code->allocate_temp(); for (size_t i = num_shards; i >= 1; --i) { code->insert_after( insts[2], {(new IRInstruction(OPCODE_SGET_OBJECT))->set_field(fields.at(i)), (new IRInstruction(IOPCODE_MOVE_RESULT_PSEUDO_OBJECT)) ->set_dest(vY), (new IRInstruction(OPCODE_CONST)) ->set_literal(i - 1) ->set_dest(vN), (new IRInstruction(OPCODE_APUT_OBJECT)) ->set_srcs_size(3) ->set_src(0, vY) ->set_src(1, vX) ->set_src(2, vN)}); } patched = true; }); always_assert_log(patched, "Failed to insert sMethodStatsN to sMethodStatsArray:\n%s", SHOW(clinit->get_code())); return fields; } void InstrumentPass::run_pass(DexStoresVector& stores, ConfigFiles& cfg, PassManager& pm) { // TODO(fengliu): We may need change this but leave it here for local test. if (m_options.instrumentation_strategy == METHOD_REPLACEMENT) { bool exclude_primary_dex = pm.get_redex_options().is_art_build ? false : true; auto num_wrapped_invocations = method_reference::wrap_instance_call_with_static( stores, m_options.methods_replacement, exclude_primary_dex); pm.set_metric("wrapped_invocations", num_wrapped_invocations); return; } count_source_block_chain_length(stores, pm); if (!cfg.get_json_config().get("instrument_pass_enabled", false) && !pm.get_redex_options().instrument_pass_enabled) { TRACE(INSTRUMENT, 1, "--enable-instrument-pass (or \"instrument_pass_enabled\": true) is " "not specified."); pm.set_metric("skipped_pass", 1); return; } // Append block listed classes from the file, if exists. if (!m_options.blocklist_file_name.empty()) { for (const auto& e : load_blocklist_file(m_options.blocklist_file_name)) { m_options.blocklist.insert(e); } } pm.set_metric("blocklist_size", m_options.blocklist.size()); if (m_options.analysis_class_name.empty()) { std::cerr << "[InstrumentPass] error: empty analysis class name." << std::endl; exit(1); } // Get the analysis class. DexType* analysis_class_type = g_redex->get_type( DexString::get_string(m_options.analysis_class_name.c_str())); if (analysis_class_type == nullptr) { std::cerr << "[InstrumentPass] error: cannot find analysis class: " << m_options.analysis_class_name << std::endl; exit(1); } DexClass* analysis_cls = g_redex->type_class(analysis_class_type); always_assert(analysis_cls != nullptr); // Check whether the analysis class is in the primary dex. We use a heuristic // that looks the last 12 characters of the location of the given dex. auto dex_loc = analysis_cls->get_location()->get_file_name(); if (dex_loc.size() < 12 /* strlen("/classes.dex") == 12 */ || dex_loc.substr(dex_loc.size() - 12) != "/classes.dex") { std::cerr << "[InstrumentPass] Analysis class must be in the primary dex. " "It was in " << dex_loc << std::endl; exit(1); } // Just do the very minimal common work here: load the analysis class. // Each instrumentation strategy worker function will do its own job. TRACE(INSTRUMENT, 3, "Loaded analysis class: %s (%s)", SHOW(m_options.analysis_class_name), SHOW(analysis_cls->get_location()->get_file_name())); if (m_options.instrumentation_strategy == SIMPLE_METHOD_TRACING) { do_simple_method_tracing(analysis_cls, stores, cfg, pm, m_options); } else if (m_options.instrumentation_strategy == BASIC_BLOCK_TRACING) { BlockInstrumentHelper::do_basic_block_tracing(analysis_cls, stores, cfg, pm, m_options); } else { std::cerr << "[InstrumentPass] Unknown instrumentation strategy.\n"; exit(1); } Timer cleanup{"Cleanup"}; // We're done and have inserted our instrumentation. Allow further cleanup. g_redex->instrument_mode = false; // Be nice and immediately destruct some painful block overhead. auto scope = build_class_scope(stores); // Simple config. shrinker::ShrinkerConfig shrinker_config; shrinker_config.run_const_prop = true; shrinker_config.run_copy_prop = true; shrinker_config.run_local_dce = true; shrinker_config.compute_pure_methods = false; init_classes::InitClassesWithSideEffects init_classes_with_side_effects( scope, cfg.create_init_class_insns()); int min_sdk = pm.get_redex_options().min_sdk; shrinker::Shrinker shrinker(stores, scope, init_classes_with_side_effects, shrinker_config, min_sdk); walk::parallel::methods(scope, [&](auto* m) { if (m->get_code() == nullptr) { return; } shrinker.shrink_method(m); }); } static InstrumentPass s_pass;

opt/instrument/Instrument.cpp (747 lines of code) (raw):