tools/redex-tool/DiffMethodSizes.cpp (296 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #include "DexClass.h" #include "DexInstruction.h" #include "DexUtil.h" #include "JarLoader.h" #include "ProguardConfiguration.h" #include "ProguardParser.h" #include "ReachableClasses.h" #include "RedexContext.h" #include "Show.h" #include "Tool.h" #include "Walkers.h" #include <algorithm> #include <iostream> #include <map> #include <string> #include <unordered_map> #include <unordered_set> #include <vector> namespace { using JarMethodInfoMap = std::unordered_map< // Method as string std::string, // Fields of interest in "Code_attribute" from JVM class file. std::tuple<int /*max_stack*/, int /*max_locals*/, int /*code_length*/>>; JarMethodInfoMap load_jar_method_info(const std::string& base_directory, const std::vector<std::string>& jars) { JarMethodInfoMap info; auto hook = [&info](boost::variant<DexField*, DexMethod*> field_or_method, const char* attribute_name, uint8_t* attribute_pointer) { // 0: DexField, 1: DexMethod if (field_or_method.which() != 1 || strcmp(attribute_name, "Code") != 0) { return; } uint16_t max_stack = JarLoaderUtil::read16(attribute_pointer); uint16_t max_locals = JarLoaderUtil::read16(attribute_pointer); uint32_t code_length = JarLoaderUtil::read32(attribute_pointer); DexMethod* method = boost::get<DexMethod*>(field_or_method); info.emplace(show(method), std::make_tuple(max_stack, max_locals, code_length)); }; for (const auto& jar : jars) { load_jar_file(DexLocation::make_location("", base_directory + "/" + jar), nullptr, hook); } return info; } void diff_in_out_jars_from_command_line(const std::string& command_line_path) { std::ifstream config(command_line_path); if (!config.is_open()) { std::cerr << "Unable to open \'" << command_line_path << '\'' << std::endl; return; } keep_rules::ProguardConfiguration pg_config; keep_rules::proguard_parser::parse(config, &pg_config, command_line_path); std::cout << "Number of -injar options: " << pg_config.injars.size() << std::endl; std::cout << "Number of -outjar options: " << pg_config.outjars.size() << std::endl; RedexContext* injar_context = g_redex; std::cout << "Reading injar files... " << std::flush; auto injar_info = load_jar_method_info(pg_config.basedirectory, pg_config.injars); std::cout << injar_info.size() << " method info loaded." << std::endl; // Create a new context for outjars and switch the context. std::unique_ptr<RedexContext> outjar_context(new RedexContext()); g_redex = outjar_context.get(); std::cout << "Reading outjar files... " << std::flush; auto outjar_info = load_jar_method_info(pg_config.basedirectory, pg_config.outjars); std::cout << outjar_info.size() << " method info loaded." << std::endl; std::cout << "Diffing in and out jars... " << std::endl; JarMethodInfoMap diff; for (auto&& pair : injar_info) { auto found = outjar_info.find(pair.first); if (found == end(outjar_info)) { std::cerr << "Uh-oh, " << pair.first << " can't be found in outjars" << std::endl; continue; } const auto& in = pair.second; const auto& out = found->second; if (in == out) { continue; } diff.emplace(pair.first, std::make_tuple(std::get<0>(out) - std::get<0>(in), std::get<1>(out) - std::get<1>(in), std::get<2>(out) - std::get<2>(in))); } auto print_tuple = [](const std::tuple<int, int, int>& t) { return std::to_string(std::get<0>(t)) + " " + std::to_string(std::get<1>(t)) + " " + std::to_string(std::get<2>(t)); }; for (const auto& pair : diff) { std::cout << "DIFF: " << pair.first << " " << print_tuple(pair.second) << std::endl; } for (const auto& pair : injar_info) { std::cout << "IN: " << pair.first << " " << print_tuple(pair.second) << std::endl; } for (const auto& pair : outjar_info) { std::cout << "OUT: " << pair.first << " " << print_tuple(pair.second) << std::endl; } g_redex = injar_context; } using DexMethodInfoMap = std::unordered_map<std::string, // Method as string std::tuple<int, int>>; // <code size, register size> // or <#move, moves size> if // it is storing move info. DexMethodInfoMap load_dex_method_info(const std::string& dir) { DexStore root_store("dex"); DexStoresVector stores; // Load root dexen load_root_dexen(root_store, dir); stores.emplace_back(std::move(root_store)); DexMethodInfoMap result; walk::methods(build_class_scope(stores), [&result](DexMethod* method) { auto key = show(method); always_assert(result.find(key) == end(result)); const auto* code = method->get_dex_code(); result.emplace(key, std::make_tuple((code ? code->size() : 0), (code ? code->get_registers_size() : 0))); }); return result; } DexMethodInfoMap load_dex_method_move_info(const std::string& dir) { DexStore root_store("dex"); DexStoresVector stores; // Load root dexen load_root_dexen(root_store, dir); stores.emplace_back(std::move(root_store)); DexMethodInfoMap result; walk::methods(build_class_scope(stores), [&result](DexMethod* method) { auto key = show(method); always_assert(result.find(key) == end(result)); const auto* code = method->get_dex_code(); int num_moves = 0; int moves_size = 0; if (code) { for (const auto& insn : code->get_instructions()) { if (dex_opcode::is_move(insn->opcode())) { ++num_moves; moves_size += insn->size(); } } } result.emplace(key, std::make_tuple(num_moves, moves_size)); }); return result; } void dump_method_sizes_from_dexen_dir(const std::string& dexen_dir) { std::cout << "INFO: " << "Loading directory " << dexen_dir << " ... " << std::endl; auto info = load_dex_method_info(dexen_dir); std::cout << "INFO: " << info.size() << " method information loaded" << std::endl; for (const auto& pair : info) { std::cout << "SIZE: " << pair.first << " " << std::get<0>(pair.second) << " " << std::get<1>(pair.second) << std::endl; } } void diff_from_two_dexen_dirs(const std::string& dexen_dir_A, const std::string& dexen_dir_B, bool is_comparing_dex_size) { std::cout << "INFO: " << "Loading directory " << dexen_dir_A << " ... " << std::endl; RedexContext* A_context = g_redex; auto A_info = is_comparing_dex_size ? load_dex_method_info(dexen_dir_A) : load_dex_method_move_info(dexen_dir_A); std::cout << "INFO: " << A_info.size() << " method information loaded" << std::endl; std::cout << "INFO: " << "Loading directory " << dexen_dir_B << " ... " << std::endl; std::unique_ptr<RedexContext> B_context(new RedexContext()); g_redex = B_context.get(); auto B_info = is_comparing_dex_size ? load_dex_method_info(dexen_dir_B) : load_dex_method_move_info(dexen_dir_B); std::cout << "INFO: " << B_info.size() << " method information loaded" << std::endl; std::cout << "Diffing A and B... " << std::endl; DexMethodInfoMap diff; int total_disappear_method_moves = 0; int total_disappear_method_move_sizes = 0; for (auto&& pair : A_info) { auto found = B_info.find(pair.first); if (found == end(B_info)) { if (!is_comparing_dex_size) { total_disappear_method_moves += std::get<0>(pair.second); total_disappear_method_move_sizes += std::get<1>(pair.second); } continue; } const auto& A_sizes = pair.second; const auto& B_sizes = found->second; if (A_sizes == B_sizes) { continue; } diff.emplace(pair.first, std::make_tuple(std::get<0>(B_sizes) - std::get<0>(A_sizes), std::get<1>(B_sizes) - std::get<1>(A_sizes))); } int total_num_moves = 0; int total_move_sizes = 0; for (const auto& pair : diff) { std::cout << "DIFF: " << pair.first << " " << std::get<0>(pair.second) << " " << std::get<1>(pair.second) << std::endl; if (!is_comparing_dex_size) { total_num_moves += std::get<0>(pair.second); total_move_sizes += std::get<1>(pair.second); } } if (!is_comparing_dex_size) { std::cout << "DISAPPEARED METHODS: #moves: " << total_disappear_method_moves << ", move sizes: " << total_disappear_method_move_sizes << std::endl; std::cout << "EXISTED METHODS DIFF: #moves: " << total_num_moves << ", move sizes: " << total_move_sizes << std::endl; std::cout << "TOTAL DIFF: #moves: " << total_num_moves - total_disappear_method_moves << ", move sizes: " << total_move_sizes - total_disappear_method_move_sizes << std::endl; } g_redex = A_context; } void dump_method_move_info_from_dex_dir(const std::string& dex_dir) { std::cout << "INFO: " << "Loading directory " << dex_dir << " ... " << std::endl; auto info = load_dex_method_move_info(dex_dir); std::cout << "INFO: " << info.size() << " method information loaded" << std::endl; for (const auto& pair : info) { std::cout << pair.first << ": #moves = " << std::get<0>(pair.second) << ", size = " << std::get<1>(pair.second) << std::endl; } } class DiffMethodSizes : public Tool { public: DiffMethodSizes() : Tool("diff-method-sizes", "compare method sizes") {} void add_options(po::options_description& options) const override { options.add_options()( "commandline,c", po::value<std::string>(), "compare max_stack, max_locals, code_length of all methods in " "-injars and -outjars from command-line.txt")( "dexendir,d", po::value<std::vector<std::string>>()->multitoken(), "dump all method sizes in the given dexen directory; if two dexen " "directories are given, compare the method sizes")("show-moves,s", po::value<std::vector< std::string>>() ->multitoken(), "show number of " "move code and " "their size for " "each methods"); } void run(const po::variables_map& options) override { if (!options["commandline"].empty()) { diff_in_out_jars_from_command_line( options["commandline"].as<std::string>()); } else if (!options["dexendir"].empty()) { const auto& dexen_dirs = options["dexendir"].as<std::vector<std::string>>(); switch (dexen_dirs.size()) { case 1: dump_method_sizes_from_dexen_dir(dexen_dirs[0]); break; case 2: diff_from_two_dexen_dirs( dexen_dirs[0], dexen_dirs[1], true /* is_comparing_dex_size */); break; default: std::cerr << "Only one or two --dexendir can be provided" << std::endl; break; } } else if (!options["show-moves"].empty()) { const auto& dex_dirs = options["show-moves"].as<std::vector<std::string>>(); switch (dex_dirs.size()) { case 1: dump_method_move_info_from_dex_dir(dex_dirs[0]); break; case 2: diff_from_two_dexen_dirs( dex_dirs[0], dex_dirs[1], false /* is_comparing_dex_size */); break; default: std::cerr << "Only one or two --dexendir can be provided" << std::endl; break; } } else { std::cerr << "No option or invalid option was given" << std::endl; } } }; } // namespace static DiffMethodSizes s_diff_method_sizes;