libredex/PassManager.cpp (1,107 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "PassManager.h"
#include "DexAssessments.h"
#include <boost/filesystem.hpp>
#include <cinttypes>
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <limits>
#include <list>
#include <thread>
#include <typeinfo>
#include <unordered_set>
#ifdef __linux__
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#endif
#include "AnalysisUsage.h"
#include "ApiLevelChecker.h"
#include "AssetManager.h"
#include "CFGMutation.h"
#include "CommandProfiling.h"
#include "ConfigFiles.h"
#include "Debug.h"
#include "DexClass.h"
#include "DexLoader.h"
#include "DexOutput.h"
#include "DexUtil.h"
#include "GraphVisualizer.h"
#include "IRCode.h"
#include "IRTypeChecker.h"
#include "InstructionLowering.h"
#include "JemallocUtil.h"
#include "MethodProfiles.h"
#include "Native.h"
#include "OptData.h"
#include "Pass.h"
#include "PrintSeeds.h"
#include "ProguardPrintConfiguration.h"
#include "ProguardReporting.h"
#include "ReachableClasses.h"
#include "Sanitizers.h"
#include "ScopedCFG.h"
#include "ScopedMetrics.h"
#include "Show.h"
#include "SourceBlocks.h"
#include "Timer.h"
#include "Walkers.h"
namespace {
constexpr const char* INCOMING_HASHES = "incoming_hashes.txt";
constexpr const char* OUTGOING_HASHES = "outgoing_hashes.txt";
constexpr const char* REMOVABLE_NATIVES = "redex-removable-natives.txt";
const std::string PASS_ORDER_KEY = "pass_order";
const Pass* get_profiled_pass(const PassManager& mgr) {
redex_assert(getenv("PROFILE_PASS") != nullptr);
// Resolve the pass in the constructor so that any typos / references to
// nonexistent passes are caught as early as possible
auto pass = mgr.find_pass(getenv("PROFILE_PASS"));
always_assert(pass != nullptr);
std::cerr << "Will run profiler for " << pass->name() << std::endl;
return pass;
}
std::string get_apk_dir(const Json::Value& config) {
auto apkdir = config["apk_dir"].asString();
apkdir.erase(std::remove(apkdir.begin(), apkdir.end(), '"'), apkdir.end());
return apkdir;
}
class CheckerConfig {
public:
explicit CheckerConfig(const ConfigFiles& conf) {
const Json::Value& type_checker_args =
conf.get_json_config()["ir_type_checker"];
m_run_type_checker_on_input =
type_checker_args.get("run_on_input", true).asBool();
m_run_type_checker_on_input_ignore_access =
type_checker_args.get("run_on_input_ignore_access", false).asBool();
m_run_type_checker_after_each_pass =
type_checker_args.get("run_after_each_pass", true).asBool();
m_verify_moves = type_checker_args.get("verify_moves", true).asBool();
m_validate_invoke_super =
type_checker_args.get("validate_invoke_super", true).asBool();
m_check_no_overwrite_this =
type_checker_args.get("check_no_overwrite_this", false).asBool();
m_annotated_cfg_on_error =
type_checker_args.get("annotated_cfg_on_error", false).asBool();
m_annotated_cfg_on_error_reduced =
type_checker_args.get("annotated_cfg_on_error_reduced", true).asBool();
m_check_num_of_refs =
type_checker_args.get("check_num_of_refs", false).asBool();
for (auto& trigger_pass : type_checker_args["run_after_passes"]) {
m_type_checker_trigger_passes.insert(trigger_pass.asString());
}
}
void on_input(const Scope& scope) {
if (!m_run_type_checker_on_input) {
std::cerr << "Note: input type checking is turned off!" << std::endl;
return;
}
auto res =
check_no_overwrite_this(false).validate_access(true).run_verifier(
scope, /* exit_on_fail= */ false);
if (!res) {
return; // No issues.
}
if (!m_run_type_checker_on_input_ignore_access) {
std::string msg = *res;
msg +=
"\n If you are confident that this does not matter (e.g., because "
"you are using MakePublicPass), turn off accessibility checking on "
"input with `-J ir_type_checker.run_on_input_ignore_access=true`.\n "
"You may turn off all input checking with `-J "
"ir_type_checker.run_on_input=false`.";
fail_error(msg);
}
res = check_no_overwrite_this(false).validate_access(false).run_verifier(
scope, /* exit_on_fail= */ false);
if (!res) {
std::cerr << "Warning: input has accessibility issues. Continuing."
<< std::endl;
return; // "No" issues.
}
std::string msg = *res;
msg +=
"\n If you are confident that this does not matter, turn off input "
"checking with `-J ir_type_checker.run_on_input=false`.";
fail_error(msg);
}
bool run_after_pass(const Pass* pass) {
return m_run_type_checker_after_each_pass ||
m_type_checker_trigger_passes.count(pass->name()) > 0;
}
/**
* Return activated_passes.size() if the checking is turned off.
* Otherwize, return 0 or the index of the last InterDexPass.
*/
size_t min_pass_idx_for_dex_ref_check(
const std::vector<Pass*>& activated_passes) {
if (!m_check_num_of_refs) {
return activated_passes.size();
}
size_t idx = 0;
for (size_t i = 0; i < activated_passes.size(); i++) {
if (activated_passes[i]->name() == "InterDexPass") {
idx = i;
}
}
return idx;
}
static void ref_validation(const DexStoresVector& stores,
const std::string& pass_name) {
Timer t("ref_validation");
auto check_ref_num = [pass_name](const DexClasses& classes,
const DexStore& store, size_t dex_id) {
constexpr size_t limit = 65536;
std::unordered_set<DexMethodRef*> total_method_refs;
std::unordered_set<DexFieldRef*> total_field_refs;
std::unordered_set<DexType*> total_type_refs;
for (const auto cls : classes) {
std::vector<DexMethodRef*> method_refs;
std::vector<DexFieldRef*> field_refs;
std::vector<DexType*> type_refs;
cls->gather_methods(method_refs);
cls->gather_fields(field_refs);
cls->gather_types(type_refs);
total_type_refs.insert(type_refs.begin(), type_refs.end());
total_field_refs.insert(field_refs.begin(), field_refs.end());
total_method_refs.insert(method_refs.begin(), method_refs.end());
}
TRACE(PM, 1, "dex %s: method refs %zu, filed refs %zu, type refs %zu",
dex_name(store, dex_id).c_str(), total_method_refs.size(),
total_field_refs.size(), total_type_refs.size());
always_assert_log(total_method_refs.size() <= limit,
"%s adds too many method refs", pass_name.c_str());
always_assert_log(total_field_refs.size() <= limit,
"%s adds too many field refs", pass_name.c_str());
always_assert_log(total_type_refs.size() <= limit,
"%s adds too many type refs", pass_name.c_str());
};
for (const auto& store : stores) {
size_t dex_id = 0;
for (const auto& classes : store.get_dexen()) {
check_ref_num(classes, store, dex_id++);
}
}
}
// Literate style.
CheckerConfig check_no_overwrite_this(bool val) const {
CheckerConfig ret = *this;
ret.m_check_no_overwrite_this = val;
return ret;
}
CheckerConfig validate_access(bool val) const {
CheckerConfig ret = *this;
ret.m_validate_access = val;
return ret;
}
boost::optional<std::string> run_verifier(const Scope& scope,
bool exit_on_fail = true) {
TRACE(PM, 1, "Running IRTypeChecker...");
Timer t("IRTypeChecker");
struct Result {
size_t errors{0};
DexMethod* smallest_error_method{nullptr};
size_t smallest_size{std::numeric_limits<size_t>::max()};
Result() = default;
explicit Result(DexMethod* m)
: errors(1),
smallest_error_method(m),
smallest_size(m->get_code()->count_opcodes()) {}
Result& operator+=(const Result& other) {
errors += other.errors;
if (smallest_size > other.smallest_size) {
smallest_size = other.smallest_size;
smallest_error_method = other.smallest_error_method;
}
return *this;
}
};
auto run_checker_tmpl = [&](DexMethod* dex_method, auto fn) {
IRTypeChecker checker(dex_method, m_validate_access,
m_validate_invoke_super);
if (m_verify_moves) {
checker.verify_moves();
}
if (m_check_no_overwrite_this) {
checker.check_no_overwrite_this();
}
return fn(std::move(checker));
};
auto run_checker = [&](DexMethod* dex_method) {
return run_checker_tmpl(dex_method, [](auto checker) {
checker.run();
return checker;
});
};
auto run_checker_error = [&](DexMethod* dex_method) {
if (m_annotated_cfg_on_error) {
return run_checker_tmpl(dex_method, [&](auto checker) {
if (m_annotated_cfg_on_error_reduced) {
return checker.dump_annotated_cfg_reduced(dex_method);
} else {
return checker.dump_annotated_cfg(dex_method);
}
});
}
return show(dex_method->get_code());
};
auto res =
walk::parallel::methods<Result>(scope, [&](DexMethod* dex_method) {
auto checker = run_checker(dex_method);
if (!checker.fail()) {
return Result();
}
return Result(dex_method);
});
if (res.errors == 0) {
return boost::none;
}
// Re-run the smallest method to produce error message.
auto checker = run_checker(res.smallest_error_method);
redex_assert(checker.fail());
std::ostringstream oss;
oss << "Inconsistency found in Dex code for "
<< show(res.smallest_error_method) << std::endl
<< " " << checker.what() << std::endl
<< "Code:" << std::endl
<< run_checker_error(res.smallest_error_method);
if (res.errors > 1) {
oss << "\n(" << (res.errors - 1) << " more issues!)";
}
always_assert_log(!exit_on_fail, "%s", oss.str().c_str());
return oss.str();
}
static void fail_error(const std::string& error_msg, size_t errors = 1) {
std::cerr << error_msg << std::endl;
if (errors > 1) {
std::cerr << "(" << (errors - 1) << " more issues!)" << std::endl;
}
_exit(EXIT_FAILURE);
}
private:
std::unordered_set<std::string> m_type_checker_trigger_passes;
bool m_run_type_checker_on_input;
bool m_run_type_checker_after_each_pass;
bool m_run_type_checker_on_input_ignore_access;
bool m_verify_moves;
bool m_validate_invoke_super;
bool m_check_no_overwrite_this;
bool m_check_num_of_refs;
// TODO(fengliu): Kill the `validate_access` flag.
bool m_validate_access{true};
bool m_annotated_cfg_on_error{false};
bool m_annotated_cfg_on_error_reduced{true};
};
class ScopedVmHWM {
public:
explicit ScopedVmHWM(bool enabled, bool reset) : m_enabled(enabled) {
if (enabled) {
if (reset) {
try_reset_hwm_mem_stat();
}
m_before = get_mem_stats().vm_hwm;
}
}
void trace_log(PassManager* mgr, const Pass* pass) {
if (m_enabled) {
uint64_t after = get_mem_stats().vm_hwm;
if (mgr != nullptr) {
mgr->set_metric("vm_hwm_after", after);
mgr->set_metric("vm_hwm_delta", after - m_before);
}
TRACE(STATS, 1, "VmHWM for %s was %s (%s over start).",
pass->name().c_str(), pretty_bytes(after).c_str(),
pretty_bytes(after - m_before).c_str());
}
}
private:
uint64_t m_before;
bool m_enabled;
};
class CheckUniqueDeobfuscatedNames {
public:
bool m_after_each_pass{false};
explicit CheckUniqueDeobfuscatedNames(ConfigFiles& conf) {
const Json::Value& args =
conf.get_json_config()["check_unique_deobfuscated_names"];
m_after_each_pass = args.get("run_after_each_pass", false).asBool();
m_initially = args.get("run_initially", false).asBool();
m_finally = args.get("run_finally", false).asBool();
}
void run_initially(const Scope& scope) {
if (m_initially) {
check_unique_deobfuscated_names("<initial>", scope);
}
}
void run_finally(const Scope& scope) {
if (m_finally) {
check_unique_deobfuscated_names("<final>", scope);
}
}
void run_after_pass(const Pass* pass, const Scope& scope) {
if (m_after_each_pass) {
check_unique_deobfuscated_names(pass->name().c_str(), scope);
}
}
private:
void check_unique_deobfuscated_names(const char* pass_name,
const Scope& scope) {
TRACE(PM, 1, "Running check_unique_deobfuscated_names...");
Timer t("check_unique_deobfuscated_names");
std::unordered_map<const DexString*, DexMethod*> method_names;
walk::methods(scope, [&method_names, pass_name](DexMethod* dex_method) {
auto deob = dex_method->get_deobfuscated_name_or_null();
auto it = method_names.find(deob);
if (it != method_names.end()) {
fprintf(
stderr,
"ABORT! [%s] Duplicate deobfuscated method name: %s\nfor %s\n vs "
"%s\n",
pass_name, it->first->c_str(), SHOW(dex_method), SHOW(it->second));
exit(EXIT_FAILURE);
}
method_names.emplace(deob, dex_method);
});
std::unordered_map<std::string, DexField*> field_names;
walk::fields(scope, [&field_names, pass_name](DexField* dex_field) {
auto deob = dex_field->get_deobfuscated_name();
auto it = field_names.find(deob);
if (it != field_names.end()) {
fprintf(stderr,
"ABORT! [%s] Duplicate deobfuscated field name: %s\nfor %s\n "
"vs %s\n",
pass_name, it->first.c_str(), SHOW(dex_field),
SHOW(it->second));
exit(EXIT_FAILURE);
}
field_names.emplace(deob, dex_field);
});
}
bool m_initially{false};
bool m_finally{false};
};
class VisualizerHelper {
public:
explicit VisualizerHelper(const ConfigFiles& conf)
: m_class_cfgs(conf.metafile(CFG_DUMP_BASE_NAME),
conf.get_json_config().get("write_cfg_each_pass", false)) {
m_class_cfgs.add_all(
conf.get_json_config().get("dump_cfg_classes", std::string("")));
}
void add_pass(const Pass* pass, size_t i) {
m_class_cfgs.add_pass(
[&]() { return pass->name() + "(" + std::to_string(i) + ")"; },
VISUALIZER_PASS_OPTIONS);
}
void finalize() {
m_class_cfgs.add_pass("After all passes");
m_class_cfgs.write();
}
private:
static constexpr visualizer::Options VISUALIZER_PASS_OPTIONS =
(visualizer::Options)(visualizer::Options::SKIP_NO_CHANGE |
visualizer::Options::FORCE_CFG);
static constexpr const char* CFG_DUMP_BASE_NAME = "redex-cfg-dumps.cfg";
visualizer::Classes m_class_cfgs;
};
class AnalysisUsageHelper {
public:
using PreservedMap = std::unordered_map<AnalysisID, Pass*>;
explicit AnalysisUsageHelper(PreservedMap& m)
: m_preserved_analysis_passes(m) {}
void pre_pass(Pass* pass) { pass->set_analysis_usage(m_analysis_usage); }
void post_pass(Pass* pass) {
// Invalidate existing preserved analyses according to policy set by each
// pass.
m_analysis_usage.do_pass_invalidation(&m_preserved_analysis_passes);
if (pass->is_analysis_pass()) {
// If the pass is an analysis pass, preserve it.
m_preserved_analysis_passes.emplace(get_analysis_id_by_pass(pass), pass);
}
}
private:
AnalysisUsage m_analysis_usage;
PreservedMap& m_preserved_analysis_passes;
};
class JNINativeContextHelper {
public:
explicit JNINativeContextHelper(const Scope& scope,
const std::string& jni_output_dir) {
if (!jni_output_dir.empty()) {
// Currently, if the path is not found, the native context is going to be
// empty.
g_native_context = std::make_unique<native::NativeContext>(
native::NativeContext::build(jni_output_dir, scope));
// Before running any passes, treat everything as removable.
walk::methods(scope, [this](DexMethod* m) {
if (is_native(m)) {
auto native_func = native::get_native_function_for_dex_method(m);
if (native_func) {
TRACE(NATIVE, 2, "Found native function %s",
native_func->get_name().c_str());
m_removable_natives.emplace(native_func);
} else {
// There's a native method which we don't find. Let's be
// conservative and ask Redex not to remove it.
m->rstate.set_root();
// Ignore "linking" failures for pre-existing "linking" failures.
m_java_method_no_impl_on_input.emplace(m);
}
}
});
}
}
void post_passes(const Scope& scope, ConfigFiles& conf) {
if (!g_native_context) {
return;
}
// After running all passes, walk through the removable functions and
// remove the ones should remain.
walk::methods(scope, [this](DexMethod* m) {
if (is_native(m)) {
auto native_func = native::get_native_function_for_dex_method(m);
if (native_func) {
auto it = m_removable_natives.find(native_func);
if (it != m_removable_natives.end()) {
TRACE(NATIVE, 2, "Cannot remove native function %s, called as %s",
native_func->get_name().c_str(), SHOW(m));
m_removable_natives.erase(it);
}
} else if (!m_java_method_no_impl_on_input.count(m)) {
// TODO: "Linking" error: Change this to an assertion failure
TRACE(PM, 1, "Unable to find native implementation for %s.", SHOW(m));
}
}
});
TRACE(NATIVE, 2, "Total removable natives: %lu",
m_removable_natives.size());
auto removable_natives_file_name = conf.metafile(REMOVABLE_NATIVES);
std::vector<std::string> output_symbols;
output_symbols.reserve(m_removable_natives.size());
// Might be non-deterministic in order, put them in a vector and sort.
for (auto func : m_removable_natives) {
output_symbols.push_back(func->get_name());
}
std::sort(output_symbols.begin(), output_symbols.end());
std::ofstream out(removable_natives_file_name);
// TODO: For better human readability, change this to CSV of native,java?
for (const auto& name : output_symbols) {
out << name << std::endl;
}
g_native_context.reset();
}
private:
std::unordered_set<native::Function*> m_removable_natives;
std::unordered_set<DexMethod*> m_java_method_no_impl_on_input;
};
void process_method_profiles(PassManager& mgr, ConfigFiles& conf) {
// New methods might have been introduced by this pass; process previously
// unresolved methods to see if we can match them now (so that future passes
// using method profiles benefit)
conf.process_unresolved_method_profile_lines();
mgr.set_metric("~result~MethodProfiles~", conf.get_method_profiles().size());
mgr.set_metric("~result~MethodProfiles~unresolved~",
conf.get_method_profiles().unresolved_size());
}
void maybe_write_hashes_incoming(const ConfigFiles& conf, const Scope& scope) {
if (conf.emit_incoming_hashes()) {
TRACE(PM, 1, "Writing incoming hashes...");
Timer t("Writing incoming hashes");
std::ofstream hashes_file(conf.metafile(INCOMING_HASHES));
hashing::print_classes(hashes_file, scope);
}
}
void maybe_write_hashes_outgoing(const ConfigFiles& conf, const Scope& scope) {
if (conf.emit_outgoing_hashes()) {
TRACE(PM, 1, "Writing outgoing hashes...");
Timer t("Writing outgoing hashes");
std::ofstream hashes_file(conf.metafile(OUTGOING_HASHES));
hashing::print_classes(hashes_file, scope);
}
}
void maybe_write_env_seeds_file(const ConfigFiles& conf, const Scope& scope) {
char* seeds_output_file = std::getenv("REDEX_SEEDS_FILE");
if (seeds_output_file) {
std::string seed_filename = seeds_output_file;
Timer t("Writing seeds file " + seed_filename);
std::ofstream seeds_file(seed_filename);
keep_rules::print_seeds(seeds_file, conf.get_proguard_map(), scope, false,
false);
}
}
void maybe_print_seeds_incoming(
const ConfigFiles& conf,
const Scope& scope,
const std::unique_ptr<keep_rules::ProguardConfiguration>& pg_config) {
if (!conf.get_printseeds().empty()) {
Timer t("Writing seeds to file " + conf.get_printseeds());
std::ofstream seeds_file(conf.get_printseeds());
keep_rules::print_seeds(seeds_file, conf.get_proguard_map(), scope);
std::ofstream config_file(conf.get_printseeds() + ".pro");
redex_assert(pg_config != nullptr);
keep_rules::show_configuration(config_file, scope, *pg_config);
std::ofstream incoming(conf.get_printseeds() + ".incoming");
redex::print_classes(incoming, conf.get_proguard_map(), scope);
std::ofstream shrinking_file(conf.get_printseeds() + ".allowshrinking");
keep_rules::print_seeds(shrinking_file, conf.get_proguard_map(), scope,
true, false);
std::ofstream obfuscation_file(conf.get_printseeds() + ".allowobfuscation");
keep_rules::print_seeds(obfuscation_file, conf.get_proguard_map(), scope,
false, true);
}
}
void maybe_print_seeds_outgoing(const ConfigFiles& conf,
const DexStoreClassesIterator& it) {
if (!conf.get_printseeds().empty()) {
Timer t("Writing outgoing classes to file " + conf.get_printseeds() +
".outgoing");
// Recompute the scope.
auto scope = build_class_scope(it);
std::ofstream outgoing(conf.get_printseeds() + ".outgoing");
redex::print_classes(outgoing, conf.get_proguard_map(), scope);
}
}
void maybe_enable_opt_data(const ConfigFiles& conf) {
// Enable opt decision logging if specified in config.
const Json::Value& opt_decisions_args =
conf.get_json_config()["opt_decisions"];
if (opt_decisions_args.get("enable_logs", false).asBool()) {
opt_metadata::OptDataMapper::get_instance().enable_logs();
}
}
bool is_run_hasher_after_each_pass(const ConfigFiles& conf,
const RedexOptions& options) {
if (options.disable_dex_hasher) {
return false;
}
const Json::Value& hasher_args = conf.get_json_config()["hasher"];
return hasher_args.get("run_after_each_pass", false).asBool();
}
AssessorConfig get_assessor_config(const ConfigFiles& conf,
const RedexOptions&) {
const Json::Value& assessor_args = conf.get_json_config()["assessor"];
AssessorConfig res;
res.run_after_each_pass =
assessor_args.get("run_after_each_pass", false).asBool();
res.run_initially = assessor_args.get("run_initially", false).asBool();
res.run_finally = assessor_args.get("run_finally", false).asBool();
res.run_sb_consistency =
assessor_args.get("run_sb_consistency", false).asBool();
return res;
}
class AfterPassSizes {
private:
PassManager* m_mgr;
// Would be nice to do things multi-threaded, but then we cannot
// fork and can have only one job in flight. Instead store pids
// and use non-blocking waits.
struct Job {
PassManager::PassInfo* pass_info;
std::string tmp_dir;
pid_t pid;
Job(PassManager::PassInfo* pass_info, const std::string& tmp_dir, pid_t pid)
: pass_info(pass_info), tmp_dir(tmp_dir), pid(pid) {}
};
std::list<Job> m_open_jobs;
bool m_enabled{false};
bool m_run_interdex{true};
bool m_debug{false};
size_t m_max_jobs{4};
public:
AfterPassSizes(PassManager* mgr, const ConfigFiles& conf) : m_mgr(mgr) {
const auto& json = conf.get_json_config();
m_enabled = json.get("after_pass_size", m_enabled);
m_run_interdex = json.get("after_pass_size_interdex", m_run_interdex);
m_debug = json.get("after_pass_size_debug", m_debug);
json.get("after_pass_size_queue", m_max_jobs, m_max_jobs);
}
bool handle(PassManager::PassInfo* pass_info,
DexStoresVector* stores,
ConfigFiles* conf) {
if (!m_enabled) {
return false;
}
#ifdef __linux__
for (;;) {
check_open_jobs(/*no_hang=*/true);
if (m_open_jobs.size() < m_max_jobs) {
break;
}
sleep(1); // Wait a bit.
}
// Create a temp dir.
std::string tmp_dir;
{
auto tmp_path = boost::filesystem::temp_directory_path();
tmp_path /= "redex.after_pass_size.XXXXXX";
const auto& tmp_str = tmp_path.string();
std::unique_ptr<char[]> c_str =
std::make_unique<char[]>(tmp_str.length() + 1);
strcpy(c_str.get(), tmp_str.c_str());
char* dir_name = mkdtemp(c_str.get());
if (dir_name == nullptr) {
std::cerr << "Could not create temporary directory!";
return false;
}
tmp_dir = dir_name;
}
pid_t p = fork();
if (p < 0) {
std::cerr << "Fork failed!" << strerror(errno) << std::endl;
return false;
}
if (p > 0) {
// Parent (=this).
m_open_jobs.emplace_back(pass_info, tmp_dir, p);
return false;
}
// Child.
return handle_child(tmp_dir, stores, conf);
#else
(void)pass_info;
return false;
#endif
}
void wait() {
#ifdef __linux__
check_open_jobs(/*no_hang=*/false);
#endif
}
private:
#ifdef __linux__
void check_open_jobs(bool no_hang) {
for (auto it = m_open_jobs.begin(); it != m_open_jobs.end();) {
int stat;
pid_t wait_res;
for (;;) {
wait_res = waitpid(it->pid, &stat, no_hang ? WNOHANG : 0);
if (wait_res != -1 || errno != EINTR) {
break;
}
}
if (wait_res == 0) {
// Not done.
++it;
continue;
}
if (wait_res == -1) {
std::cerr << "Failed " << it->pass_info->name << std::endl;
} else {
if (WIFEXITED(stat) && WEXITSTATUS(stat) == 0) {
handle_parent(*it);
} else {
std::cerr << "AfterPass child failed: " << std::hex << stat
<< std::dec << std::endl;
}
}
boost::filesystem::remove_all(it->tmp_dir);
it = m_open_jobs.erase(it);
}
}
void handle_parent(const Job& job) {
// Collect dex file sizes in the temp directory.
// Discover dex files
namespace fs = boost::filesystem;
auto end = fs::directory_iterator();
size_t sum{0};
for (fs::directory_iterator it{job.tmp_dir}; it != end; ++it) {
const auto& file = it->path();
if (fs::is_regular_file(file) &&
!file.extension().compare(std::string(".dex"))) {
sum += fs::file_size(file);
}
}
job.pass_info->metrics["after_pass_size"] = sum;
if (m_debug) {
std::cerr << "Got " << sum << " for " << job.pass_info->name << std::endl;
}
}
bool handle_child(const std::string& tmp_dir,
DexStoresVector* stores,
ConfigFiles* conf) {
// Change output directory.
if (m_debug) {
std::cerr << "After-pass-size to " << tmp_dir << std::endl;
}
conf->set_outdir(tmp_dir);
// Close output. No noise. (Maybe make this configurable)
if (!m_debug) {
close(STDOUT_FILENO);
close(STDERR_FILENO);
}
auto maybe_run = [&](const char* pass_name) {
auto pass = m_mgr->find_pass(pass_name);
if (pass != nullptr) {
if (m_debug) {
std::cerr << "Running " << pass_name << std::endl;
}
pass->run_pass(*stores, *conf, *m_mgr);
}
};
// If configured with InterDexPass, better run that. Expensive, but may be
// required for dex constraints.
if (m_run_interdex && !m_mgr->interdex_has_run()) {
maybe_run("InterDexPass");
}
// Better run MakePublicPass.
maybe_run("MakePublicPass");
// May need register allocation.
if (!m_mgr->regalloc_has_run()) {
maybe_run("RegAllocPass");
}
// Ensure we do not wait for anything copied from the parent.
m_open_jobs.clear();
m_enabled = false;
// Make the PassManager skip further passes.
return true;
}
#endif
};
void run_assessor(PassManager& pm, const Scope& scope, bool initially = false) {
TRACE(PM, 2, "Running assessor...");
Timer t("Assessor");
assessments::DexScopeAssessor assessor(scope);
auto assessment = assessor.run();
std::string prefix =
std::string("~") + (initially ? "PRE" : "") + "assessment~";
// log metric value in a way that fits into JSON number value
for (auto& p : assessments::order(assessment)) {
pm.set_metric(prefix + p.first, p.second);
}
}
// For debugging purpose allows tracing a class after each pass.
// Env variable TRACE_CLASS_FILE provides the name of the output file where
// these data will be written and env variable TRACE_CLASS_NAME would provide
// the name of the class to be traced.
class TraceClassAfterEachPass {
public:
TraceClassAfterEachPass() {
trace_class_file = getenv("TRACE_CLASS_FILE");
trace_class_name = getenv("TRACE_CLASS_NAME");
std::cerr << "TRACE_CLASS_FILE="
<< (trace_class_file == nullptr ? "" : trace_class_file)
<< std::endl;
std::cerr << "TRACE_CLASS_NAME="
<< (trace_class_name == nullptr ? "" : trace_class_name)
<< std::endl;
if (trace_class_name) {
if (trace_class_file) {
try {
int int_fd = std::stoi(trace_class_file);
fd = fdopen(int_fd, "w");
} catch (std::invalid_argument&) {
// Not an integer file descriptor; real file name.
fd = fopen(trace_class_file, "w");
}
if (!fd) {
fprintf(stderr,
"Unable to open TRACE_CLASS_FILE, falling back to stderr\n");
fd = stderr;
}
}
}
}
~TraceClassAfterEachPass() {
if (fd != stderr) {
fclose(fd);
}
}
void dump_cls(DexClass* cls) {
fprintf(fd, "Class %s\n", SHOW(cls));
std::vector<DexMethod*> methods = cls->get_all_methods();
std::vector<DexField*> fields = cls->get_all_fields();
for (auto* v : fields) {
fprintf(fd, "Field %s\n", SHOW(v));
}
for (auto* v : methods) {
fprintf(fd, "Method %s\n", SHOW(v));
if (v->get_code()) {
fprintf(fd, "%s\n", SHOW(v->get_code()));
}
}
}
void dump(const std::string& pass_name) {
if (trace_class_name) {
fprintf(fd, "After Pass %s\n", pass_name.c_str());
auto* typ = DexType::get_type(trace_class_name);
if (typ && type_class(typ)) {
dump_cls(type_class(typ));
} else {
fprintf(fd, "Class = %s not foud\n", trace_class_name);
}
}
}
private:
FILE* fd = stderr;
char* trace_class_file;
char* trace_class_name;
};
static TraceClassAfterEachPass trace_cls;
} // namespace
std::unique_ptr<keep_rules::ProguardConfiguration> empty_pg_config() {
return std::make_unique<keep_rules::ProguardConfiguration>();
}
PassManager::PassManager(const std::vector<Pass*>& passes)
: PassManager(passes, Json::Value(Json::objectValue), RedexOptions{}) {}
PassManager::PassManager(const std::vector<Pass*>& passes,
const Json::Value& config,
const RedexOptions& options)
: PassManager(passes, empty_pg_config(), config, options) {}
PassManager::PassManager(
const std::vector<Pass*>& passes,
std::unique_ptr<keep_rules::ProguardConfiguration> pg_config)
: PassManager(passes,
std::move(pg_config),
Json::Value(Json::objectValue),
RedexOptions{}) {}
PassManager::PassManager(
const std::vector<Pass*>& passes,
std::unique_ptr<keep_rules::ProguardConfiguration> pg_config,
const Json::Value& config,
const RedexOptions& options)
: m_asset_mgr(get_apk_dir(config)),
m_registered_passes(passes),
m_current_pass_info(nullptr),
m_pg_config(std::move(pg_config)),
m_redex_options(options),
m_testing_mode(false) {
init(config);
if (getenv("MALLOC_PROFILE_PASS")) {
m_malloc_profile_pass = find_pass(getenv("MALLOC_PROFILE_PASS"));
always_assert(m_malloc_profile_pass != nullptr);
fprintf(stderr, "Will run jemalloc profiler for %s\n",
m_malloc_profile_pass->name().c_str());
}
}
PassManager::~PassManager() {}
void PassManager::init(const Json::Value& config) {
if (config["redex"].isMember("passes")) {
const auto& redex = config["redex"];
auto passes_from_config = redex["passes"];
for (const auto& pass : passes_from_config) {
std::string pass_name = pass.asString();
// Check whether it is explicitly disabled.
auto is_disabled = [&config, &pass_name]() {
if (!config.isMember(pass_name)) {
return false;
}
const auto& pass_data = config[pass_name];
if (!pass_data.isMember("disabled")) {
return false;
}
return pass_data["disabled"].asBool();
};
if (is_disabled()) {
continue;
}
activate_pass(pass_name, config);
}
} else {
// If config isn't set up, run all registered passes.
m_activated_passes = m_registered_passes;
// But do not forget to initialize them.
for (auto* pass : m_activated_passes) {
pass->parse_config(JsonWrapper(config[pass->name()]));
}
}
// Count the number of appearances of each pass name.
std::unordered_map<const Pass*, size_t> pass_repeats;
for (const Pass* pass : m_activated_passes) {
++pass_repeats[pass];
}
// Init m_pass_info
std::unordered_map<const Pass*, size_t> pass_counters;
m_pass_info.resize(m_activated_passes.size());
for (size_t i = 0; i < m_activated_passes.size(); ++i) {
Pass* pass = m_activated_passes[i];
const size_t count = pass_counters[pass]++;
m_pass_info[i].pass = pass;
m_pass_info[i].order = i;
m_pass_info[i].repeat = count;
m_pass_info[i].total_repeat = pass_repeats.at(pass);
m_pass_info[i].name = pass->name() + "#" + std::to_string(count + 1);
m_pass_info[i].metrics[PASS_ORDER_KEY] = i;
m_pass_info[i].config = JsonWrapper(config[pass->name()]);
}
}
hashing::DexHash PassManager::run_hasher(const char* pass_name,
const Scope& scope) {
TRACE(PM, 2, "Running hasher...");
Timer t("Hasher");
auto timer = m_hashers_timer.scope();
hashing::DexScopeHasher hasher(scope);
auto hash = hasher.run();
if (pass_name) {
// log metric value in a way that fits into JSON number value
set_metric("~result~code~hash~",
hash.code_hash & ((((size_t)1) << 52) - 1));
set_metric("~result~registers~hash~",
hash.registers_hash & ((((size_t)1) << 52) - 1));
set_metric("~result~positions~hash~",
hash.positions_hash & ((((size_t)1) << 52) - 1));
set_metric("~result~signature~hash~",
hash.signature_hash & ((((size_t)1) << 52) - 1));
}
auto positions_hash_string = hashing::hash_to_string(hash.positions_hash);
auto registers_hash_string = hashing::hash_to_string(hash.registers_hash);
auto code_hash_string = hashing::hash_to_string(hash.code_hash);
auto signature_hash_string = hashing::hash_to_string(hash.signature_hash);
TRACE(PM, 3,
"[scope hash] %s: positions#%s, registers#%s, code#%s, signature#%s",
pass_name ? pass_name : "(initial)", positions_hash_string.c_str(),
registers_hash_string.c_str(), code_hash_string.c_str(),
signature_hash_string.c_str());
return hash;
}
void PassManager::eval_passes(DexStoresVector& stores, ConfigFiles& conf) {
for (size_t i = 0; i < m_activated_passes.size(); ++i) {
Pass* pass = m_activated_passes[i];
TRACE(PM, 1, "Evaluating %s...", pass->name().c_str());
Timer t(pass->name() + " (eval)");
m_current_pass_info = &m_pass_info[i];
pass->eval_pass(stores, conf, *this);
m_current_pass_info = nullptr;
}
}
void PassManager::run_passes(DexStoresVector& stores, ConfigFiles& conf) {
auto profiler_info = ScopedCommandProfiling::maybe_info_from_env("");
const Pass* profiler_info_pass = nullptr;
if (profiler_info) {
profiler_info_pass = get_profiled_pass(*this);
}
auto profiler_all_info =
ScopedCommandProfiling::maybe_info_from_env("ALL_PASSES_");
if (conf.force_single_dex()) {
// Squash the dexes into one, so that the passes all see only one dex and
// all the cross-dex reference checking are accurate.
squash_into_one_dex(stores);
}
DexStoreClassesIterator it(stores);
Scope scope = build_class_scope(it);
// Clear stale data. Make sure we start fresh.
m_preserved_analysis_passes.clear();
{
Timer t("API Level Checker");
api::LevelChecker::init(m_redex_options.min_sdk, scope);
}
maybe_write_env_seeds_file(conf, scope);
maybe_print_seeds_incoming(conf, scope, m_pg_config);
maybe_write_hashes_incoming(conf, scope);
maybe_enable_opt_data(conf);
// Load configurations regarding the scope.
conf.load(scope);
sanitizers::lsan_do_recoverable_leak_check();
eval_passes(stores, conf);
// Retrieve the hasher's settings.
bool run_hasher_after_each_pass =
is_run_hasher_after_each_pass(conf, get_redex_options());
// Retrieve the assessor's settings.
m_assessor_config = ::get_assessor_config(conf, get_redex_options());
const auto& assessor_config = this->get_assessor_config();
// Retrieve the type checker's settings.
CheckerConfig checker_conf{conf};
checker_conf.on_input(scope);
// Pull on method-profiles, so that they get initialized, and are matched
// against the *initial* methods
conf.get_method_profiles();
if (run_hasher_after_each_pass) {
m_initial_hash = run_hasher(nullptr, scope);
}
CheckUniqueDeobfuscatedNames check_unique_deobfuscated{conf};
check_unique_deobfuscated.run_initially(scope);
VisualizerHelper graph_visualizer(conf);
sanitizers::lsan_do_recoverable_leak_check();
const bool hwm_pass_stats =
traceEnabled(STATS, 1) || conf.get_json_config().get("mem_stats", true);
const bool hwm_per_pass =
conf.get_json_config().get("mem_stats_per_pass", true);
size_t min_pass_idx_for_dex_ref_check =
checker_conf.min_pass_idx_for_dex_ref_check(m_activated_passes);
// Abort if the analysis pass dependencies are not satisfied.
AnalysisUsage::check_dependencies(m_activated_passes);
AfterPassSizes after_pass_size(this, conf);
// For core loop legibility, have a lambda here.
auto pre_pass_verifiers = [&](Pass* pass, size_t i) {
if (i == 0 && assessor_config.run_initially) {
::run_assessor(*this, scope, /* initially */ true);
}
};
auto post_pass_verifiers = [&](Pass* pass, size_t i, size_t size) {
ConcurrentSet<const DexMethodRef*> all_code_referenced_methods;
walk::parallel::code(build_class_scope(stores), [&](DexMethod* m,
IRCode& code) {
// Ensure that pass authors deconstructed the editable CFG at the end of
// their pass. Currently, passes assume the incoming code will be in
// IRCode form
always_assert_log(!code.editable_cfg_built(), "%s has a cfg!", SHOW(m));
if (slow_invariants_debug) {
std::vector<DexMethodRef*> methods;
methods.reserve(1000);
methods.push_back(m);
code.gather_methods(methods);
for (auto* mref : methods) {
always_assert_log(
DexMethod::get_method(mref->get_class(), mref->get_name(),
mref->get_proto()) != nullptr,
"Did not find %s in the context, referenced from %s!", SHOW(mref),
SHOW(m));
all_code_referenced_methods.insert(mref);
}
}
});
if (slow_invariants_debug) {
ScopedMetrics sm(*this);
sm.set_metric("num_code_referenced_methods",
all_code_referenced_methods.size());
}
bool run_hasher = run_hasher_after_each_pass;
bool run_assessor = assessor_config.run_after_each_pass ||
(assessor_config.run_finally && i == size - 1);
bool run_type_checker = checker_conf.run_after_pass(pass);
if (run_hasher || run_assessor || run_type_checker ||
check_unique_deobfuscated.m_after_each_pass) {
scope = build_class_scope(it);
if (run_hasher) {
m_current_pass_info->hash = boost::optional<hashing::DexHash>(
this->run_hasher(pass->name().c_str(), scope));
}
if (run_assessor) {
::run_assessor(*this, scope);
ScopedMetrics sm(*this);
source_blocks::track_source_block_coverage(sm, stores);
}
if (run_type_checker) {
// It's OK to overwrite the `this` register if we are not yet at the
// output phase -- the register allocator can fix it up later.
checker_conf.check_no_overwrite_this(false)
.validate_access(false)
.run_verifier(scope);
}
auto timer = m_check_unique_deobfuscateds_timer.scope();
check_unique_deobfuscated.run_after_pass(pass, scope);
}
if (i >= min_pass_idx_for_dex_ref_check) {
CheckerConfig::ref_validation(stores, pass->name());
}
};
JNINativeContextHelper jni_native_context_helper(
scope, m_redex_options.jni_summary_path);
std::unordered_map<const Pass*, size_t> runs;
/////////////////////
// MAIN PASS LOOP. //
/////////////////////
for (size_t i = 0; i < m_activated_passes.size(); ++i) {
Pass* pass = m_activated_passes[i];
const size_t pass_run = ++runs[pass];
AnalysisUsageHelper analysis_usage_helper{m_preserved_analysis_passes};
analysis_usage_helper.pre_pass(pass);
TRACE(PM, 1, "Running %s...", pass->name().c_str());
ScopedVmHWM vm_hwm{hwm_pass_stats, hwm_per_pass};
Timer t(pass->name() + " " + std::to_string(pass_run) + " (run)");
m_current_pass_info = &m_pass_info[i];
pre_pass_verifiers(pass, i);
{
auto scoped_command_prof = profiler_info_pass == pass
? ScopedCommandProfiling::maybe_from_info(
profiler_info, &pass->name())
: boost::none;
auto scoped_command_all_prof = ScopedCommandProfiling::maybe_from_info(
profiler_all_info, &pass->name());
jemalloc_util::ScopedProfiling malloc_prof(m_malloc_profile_pass == pass);
pass->run_pass(stores, conf, *this);
trace_cls.dump(pass->name());
}
vm_hwm.trace_log(this, pass);
sanitizers::lsan_do_recoverable_leak_check();
graph_visualizer.add_pass(pass, i);
post_pass_verifiers(pass, i, m_activated_passes.size());
analysis_usage_helper.post_pass(pass);
process_method_profiles(*this, conf);
if (after_pass_size.handle(m_current_pass_info, &stores, &conf)) {
// Measuring child. Return to write things out.
break;
}
m_current_pass_info = nullptr;
}
after_pass_size.wait();
// Always run the type checker before generating the optimized dex code.
scope = build_class_scope(it);
checker_conf.check_no_overwrite_this(get_redex_options().no_overwrite_this())
.validate_access(true)
.run_verifier(scope);
jni_native_context_helper.post_passes(scope, conf);
check_unique_deobfuscated.run_finally(scope);
graph_visualizer.finalize();
maybe_print_seeds_outgoing(conf, it);
maybe_write_hashes_outgoing(conf, scope);
sanitizers::lsan_do_recoverable_leak_check();
Timer::add_timer("PassManager.Hashers", m_hashers_timer.get_seconds());
Timer::add_timer("PassManager.CheckUniqueDeobfuscateds",
m_check_unique_deobfuscateds_timer.get_seconds());
Timer::add_timer("CFGMutation", cfg::CFGMutation::get_seconds());
}
void PassManager::activate_pass(const std::string& name,
const Json::Value& conf) {
// Names may or may not have a "#<id>" suffix to indicate their order in the
// pass list, which needs to be removed for matching.
std::string pass_name = name.substr(0, name.find('#'));
for (auto pass : m_registered_passes) {
if (pass_name == pass->name()) {
m_activated_passes.push_back(pass);
// Retrieving the configuration specific to this particular run
// of the pass.
pass->parse_config(JsonWrapper(conf[name]));
return;
}
}
not_reached_log("No pass named %s!", name.c_str());
}
Pass* PassManager::find_pass(const std::string& pass_name) const {
auto pass_it = std::find_if(
m_activated_passes.begin(),
m_activated_passes.end(),
[&pass_name](const Pass* pass) { return pass->name() == pass_name; });
return pass_it != m_activated_passes.end() ? *pass_it : nullptr;
}
void PassManager::incr_metric(const std::string& key, int64_t value) {
always_assert_log(m_current_pass_info != nullptr, "No current pass!");
(m_current_pass_info->metrics)[key] += value;
}
void PassManager::set_metric(const std::string& key, int64_t value) {
always_assert_log(m_current_pass_info != nullptr, "No current pass!");
(m_current_pass_info->metrics)[key] = value;
}
int64_t PassManager::get_metric(const std::string& key) {
return (m_current_pass_info->metrics)[key];
}
const std::vector<PassManager::PassInfo>& PassManager::get_pass_info() const {
return m_pass_info;
}
const std::unordered_map<std::string, int64_t>&
PassManager::get_interdex_metrics() {
for (const auto& pass_info : m_pass_info) {
if (pass_info.pass->name() == "InterDexPass") {
return pass_info.metrics;
}
}
static std::unordered_map<std::string, int64_t> empty;
return empty;
}