opt/shorten-srcstrings/Shorten.cpp (129 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #include "Shorten.h" #include <stdio.h> #include <stdlib.h> #include <string> #include <unordered_map> #include <unordered_set> #include <vector> #include "Debug.h" #include "DexClass.h" #include "DexLoader.h" #include "DexOutput.h" #include "DexUtil.h" #include "IRCode.h" #include "PassManager.h" #include "Show.h" #include "Walkers.h" #include "Warning.h" constexpr const char* METRIC_SHORTENED_STRINGS = "num_shortened_strings"; constexpr const char* METRIC_BYTES_SAVED = "num_shortening_bytes_saved"; static bool maybe_file_name(const char* str, size_t len) { if (len < 5) return false; return strncmp(str + len - 5, ".java", 5) == 0; } static bool is_reasonable_string(const char* str, size_t len) { std::vector<char> avoid = {'\n', '\t', ':', ','}; if (len == 0) return false; for (size_t i = 0; i < len; i++) { for (auto c : avoid) { if (str[i] == c) { return false; } } } return true; } const DexString* get_suitable_string( std::unordered_set<const DexString*>& set, std::vector<const DexString*>& dex_strings) { while (!dex_strings.empty()) { auto val = dex_strings.back(); dex_strings.pop_back(); auto valstr = val->c_str(); auto vallen = strlen(valstr); auto not_file_name = !maybe_file_name(valstr, vallen); auto no_bad_char = is_reasonable_string(valstr, vallen); auto not_seen_yet = !set.count(val); if (not_seen_yet && not_file_name && no_bad_char) { return val; } } return nullptr; } static void strip_src_strings(DexStoresVector& stores, const char* map_path, PassManager& mgr) { size_t shortened = 0; size_t string_savings = 0; std::unordered_map<const DexString*, std::vector<const DexString*>> global_src_strings; std::unordered_set<const DexString*> shortened_used; for (auto& classes : DexStoreClassesIterator(stores)) { for (auto const& clazz : classes) { auto src_string = clazz->get_source_file(); if (src_string) { // inserting actual source files into this set will cause them to not // get used --- as the whole point of this analysis is to substitute // source file strings shortened_used.insert(src_string); } } } for (auto& classes : DexStoreClassesIterator(stores)) { std::unordered_map<const DexString*, const DexString*> src_to_shortened; std::vector<const DexString*> current_dex_strings; for (auto const& clazz : classes) { clazz->gather_strings(current_dex_strings); } sort_unique(current_dex_strings, compare_dexstrings); // reverse current_dex_strings vector, so that we prefer strings that will // get smaller indices std::reverse(std::begin(current_dex_strings), std::end(current_dex_strings)); for (auto const& clazz : classes) { auto src_string = clazz->get_source_file(); if (!src_string) { continue; } const DexString* shortened_src_string = nullptr; if (src_to_shortened.count(src_string) == 0) { shortened_src_string = get_suitable_string(shortened_used, current_dex_strings); if (!shortened_src_string) { opt_warn(UNSHORTENED_SRC_STRING, "%s\n", SHOW(src_string)); shortened_src_string = src_string; } else { shortened++; string_savings += strlen(src_string->c_str()); } src_to_shortened[src_string] = shortened_src_string; shortened_used.emplace(shortened_src_string); global_src_strings[src_string].push_back(shortened_src_string); } else { shortened_src_string = src_to_shortened[src_string]; } clazz->set_source_file(shortened_src_string); } } TRACE(SHORTEN, 1, "src strings shortened %ld, %lu bytes saved", shortened, string_savings); mgr.incr_metric(METRIC_SHORTENED_STRINGS, shortened); mgr.incr_metric(METRIC_BYTES_SAVED, string_savings); // generate mapping FILE* fd = fopen(map_path, "w"); if (fd == nullptr) { perror("Error writing mapping file"); return; } for (const auto& it : global_src_strings) { auto desc_vector = it.second; sort_unique(desc_vector); fprintf(fd, "%s ->", it.first->c_str()); for (auto str : desc_vector) { fprintf(fd, " %s,", str->c_str()); } fprintf(fd, "\n"); } fclose(fd); } void ShortenSrcStringsPass::run_pass(DexStoresVector& stores, ConfigFiles& conf, PassManager& mgr) { m_filename_mappings = conf.metafile(m_filename_mappings); strip_src_strings(stores, m_filename_mappings.c_str(), mgr); } static ShortenSrcStringsPass s_pass;