source/Options.cpp (410 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <optional>
#include <boost/algorithm/string.hpp>
#include <fmt/format.h>
#include <mariana-trench/JsonValidation.h>
#include <mariana-trench/Log.h>
#include <mariana-trench/Options.h>
namespace marianatrench {
namespace {
std::string check_path_exists(const std::string& path) {
if (!boost::filesystem::exists(path)) {
throw std::invalid_argument(fmt::format("File `{}` does not exist.", path));
}
return path;
}
std::string check_directory_exists(const std::string& path) {
if (!boost::filesystem::is_directory(path)) {
throw std::invalid_argument(
fmt::format("Directory `{}` does not exist.", path));
}
return path;
}
/* Parse a ';'-separated list of files or directories. */
std::vector<std::string> parse_paths_list(
const std::string& input,
const std::optional<std::string>& extension,
bool check_exist = true) {
std::vector<std::string> input_paths;
boost::split(input_paths, input, boost::is_any_of(",;"));
std::vector<std::string> paths;
for (const auto& path : input_paths) {
if (boost::filesystem::is_directory(path)) {
for (const auto& entry : boost::make_iterator_range(
boost::filesystem::directory_iterator(path), {})) {
if (!extension || entry.path().extension() == *extension) {
paths.push_back(entry.path().native());
}
}
} else if (boost::filesystem::exists(path)) {
paths.push_back(path);
} else if (!check_exist) {
WARNING(2, "Argument path does not exist: `{}`", path);
paths.push_back(path);
} else {
throw std::invalid_argument(
fmt::format("File `{}` does not exist.", path));
}
}
return paths;
}
std::vector<std::string> parse_search_paths(const std::string& input) {
std::vector<std::string> paths;
boost::split(paths, input, boost::is_any_of(",;"));
for (const auto& path : paths) {
if (!boost::filesystem::is_directory(path)) {
throw std::invalid_argument(
fmt::format("Directory `{}` does not exist.", path));
}
}
return paths;
}
std::vector<ModelGeneratorConfiguration> parse_json_configuration_files(
const std::vector<std::string>& paths) {
std::vector<ModelGeneratorConfiguration> result;
for (const auto& path : paths) {
Json::Value json = JsonValidation::parse_json_file(path);
for (const auto& value : JsonValidation::null_or_array(json)) {
result.push_back(ModelGeneratorConfiguration::from_json(value));
}
}
return result;
}
} // namespace
namespace program_options = boost::program_options;
Options::Options(
const std::vector<std::string>& models_paths,
const std::vector<std::string>& field_models_paths,
const std::vector<std::string>& rules_paths,
const std::vector<std::string>& lifecycles_paths,
const std::vector<std::string>& proguard_configuration_paths,
bool sequential,
bool skip_source_indexing,
bool skip_model_generation,
const std::vector<ModelGeneratorConfiguration>&
model_generators_configuration,
const std::vector<std::string>& model_generator_search_paths,
bool remove_unreachable_code,
const std::string& source_root_directory)
: models_paths_(models_paths),
field_models_paths_(field_models_paths),
rules_paths_(rules_paths),
lifecycles_paths_(lifecycles_paths),
proguard_configuration_paths_(proguard_configuration_paths),
model_generators_configuration_(model_generators_configuration),
model_generator_search_paths_(model_generator_search_paths),
source_root_directory_(source_root_directory),
sequential_(sequential),
skip_source_indexing_(skip_source_indexing),
skip_model_generation_(skip_model_generation),
remove_unreachable_code_(remove_unreachable_code),
disable_parameter_type_overrides_(false),
maximum_method_analysis_time_(std::nullopt),
maximum_source_sink_distance_(10),
dump_class_hierarchies_(false),
dump_overrides_(false),
dump_call_graph_(false),
dump_dependencies_(false),
dump_methods_(false) {}
Options::Options(const boost::program_options::variables_map& variables) {
system_jar_paths_ = parse_paths_list(
variables["system-jar-paths"].as<std::string>(),
std::nullopt,
/* check exist */ false);
apk_directory_ =
check_directory_exists(variables["apk-directory"].as<std::string>());
dex_directory_ =
check_directory_exists(variables["dex-directory"].as<std::string>());
if (!variables["models-paths"].empty()) {
models_paths_ = parse_paths_list(
variables["models-paths"].as<std::string>(), /* extension */ ".json");
}
rules_paths_ = parse_paths_list(
variables["rules-paths"].as<std::string>(), /* extension */ ".json");
if (!variables["lifecycles-paths"].empty()) {
lifecycles_paths_ = parse_paths_list(
variables["lifecycles-paths"].as<std::string>(),
/* extension */ ".json");
}
if (!variables["proguard-configuration-paths"].empty()) {
proguard_configuration_paths_ = parse_paths_list(
variables["proguard-configuration-paths"].as<std::string>(),
/* extension */ ".pro");
}
if (!variables["generated-models-directory"].empty()) {
generated_models_directory_ = check_path_exists(
variables["generated-models-directory"].as<std::string>());
}
generator_configuration_paths_ = parse_paths_list(
variables["model-generator-configuration-paths"].as<std::string>(),
/* extension */ ".json");
model_generators_configuration_ =
parse_json_configuration_files(generator_configuration_paths_);
if (!variables["model-generator-search-paths"].empty()) {
model_generator_search_paths_ = parse_search_paths(
variables["model-generator-search-paths"].as<std::string>());
}
repository_root_directory_ = check_directory_exists(
variables["repository-root-directory"].as<std::string>());
source_root_directory_ = check_directory_exists(
variables["source-root-directory"].as<std::string>());
if (!variables["source-exclude-directories"].empty()) {
source_exclude_directories_ = parse_paths_list(
variables["source-exclude-directories"].as<std::string>(),
/* extension */ std::nullopt);
}
apk_path_ = check_path_exists(variables["apk-path"].as<std::string>());
output_directory_ = boost::filesystem::path(
check_directory_exists(variables["output-directory"].as<std::string>()));
sequential_ = variables.count("sequential") > 0;
skip_source_indexing_ = variables.count("skip-source-indexing") > 0;
skip_model_generation_ = variables.count("skip-model-generation") > 0;
disable_parameter_type_overrides_ =
variables.count("disable-parameter-type-overrides") > 0;
remove_unreachable_code_ = variables.count("remove-unreachable-code") > 0;
maximum_method_analysis_time_ =
variables.count("maximum-method-analysis-time") == 0
? std::nullopt
: std::make_optional<int>(
variables["maximum-method-analysis-time"].as<int>());
maximum_source_sink_distance_ =
variables["maximum-source-sink-distance"].as<int>();
if (!variables["log-method"].empty()) {
log_methods_ = variables["log-method"].as<std::vector<std::string>>();
}
dump_class_hierarchies_ = variables.count("dump-class-hierarchies") > 0;
dump_overrides_ = variables.count("dump-overrides") > 0;
dump_call_graph_ = variables.count("dump-call-graph") > 0;
dump_dependencies_ = variables.count("dump-dependencies") > 0;
dump_methods_ = variables.count("dump-methods") > 0;
job_id_ = variables.count("job-id") == 0
? std::nullopt
: std::make_optional<std::string>(variables["job-id"].as<std::string>());
metarun_id_ = variables.count("metarun-id") == 0
? std::nullopt
: std::make_optional<std::string>(
variables["metarun-id"].as<std::string>());
}
void Options::add_options(
boost::program_options::options_description& options) {
options.add_options()(
"system-jar-paths",
program_options::value<std::string>()->required(),
"A JSON configuration file with a list of paths to the system jars.");
options.add_options()(
"apk-directory",
program_options::value<std::string>()->required(),
"The extraced APK obtained by `redex -u`.");
options.add_options()(
"dex-directory",
program_options::value<std::string>()->required(),
"The extraced DEX obtained by `redex -u`.");
options.add_options()(
"models-paths",
program_options::value<std::string>(),
"A `;` separated list of models files and directories containing models files.");
options.add_options()(
"rules-paths",
program_options::value<std::string>()->required(),
"A `;` separated list of rules files and directories containing rules files.");
options.add_options()(
"proguard-configuration-paths",
program_options::value<std::string>(),
"A `;` separated list of ProGuard configuration files or directories containing ProGuard configuration files.");
options.add_options()(
"lifecycles-paths",
program_options::value<std::string>(),
"A `;` separated list of files and directories containing life-cycles files.");
options.add_options()(
"generated-models-directory",
program_options::value<std::string>(),
"Directory where generated models will be stored.");
options.add_options()(
"model-generator-configuration-paths",
program_options::value<std::string>()->required(),
"A `;` separated list of JSON configuration files each specifying a list of absolute paths to JSON model generators or names of CPP model generators.");
options.add_options()(
"model-generator-search-paths",
program_options::value<std::string>(),
"A `;` separated list of paths where we look for JSON model generators.");
options.add_options()(
"repository-root-directory",
program_options::value<std::string>()->required(),
"The root of the repository.");
options.add_options()(
"source-root-directory",
program_options::value<std::string>()->required(),
"The root where source files for the APK can be found.");
options.add_options()(
"source-exclude-directories",
program_options::value<std::string>(),
"A `;`-separated list of directories that should be excluded from indexed source files.");
options.add_options()(
"apk-path",
program_options::value<std::string>()->required(),
"The APK to analyze.");
options.add_options()(
"output-directory",
program_options::value<std::string>()->required(),
"Directory to write results in.");
options.add_options()(
"sequential", "Run the global fixpoint without parallelization.");
options.add_options()(
"skip-source-indexing", "Skip indexing java source files.");
options.add_options()(
"skip-model-generation", "Skip running model generation.");
options.add_options()(
"disable-parameter-type-overrides",
"Disable analyzing methods with specific parameter type information.");
options.add_options()(
"remove-unreachable-code",
"Prune unreachable code based on entry points specified in proguard configuration.");
options.add_options()(
"maximum-method-analysis-time",
program_options::value<int>(),
"Specify number of seconds as a bound. If the analysis of a method takes longer than this then make the method obscure (default taint-in-taint-out).");
options.add_options()(
"maximum-source-sink-distance",
program_options::value<int>(),
"Limits the distance of sources and sinks from a trace entry point.");
options.add_options()(
"log-method",
program_options::value<std::vector<std::string>>()->multitoken(),
"Enable logging for the given methods.");
options.add_options()(
"dump-class-hierarchies",
"Dump the class hierarchies in `class_hierarchies.json`.");
options.add_options()(
"dump-overrides", "Dump the override graph in `overrides.json`.");
options.add_options()(
"dump-call-graph", "Dump the call graph in `call_graph.json`.");
options.add_options()(
"dump-dependencies", "Dump the dependency graph in `dependencies.json`.");
options.add_options()(
"dump-methods", "Dump the list of method signatures in `methods.json`.");
options.add_options()(
"job-id",
program_options::value<std::string>(),
"Identifier for the current analysis run.");
options.add_options()(
"metarun-id",
program_options::value<std::string>(),
"Identifier for a group of analysis runs.");
}
const std::vector<std::string>& Options::models_paths() const {
return models_paths_;
}
const std::vector<std::string>& Options::field_models_paths() const {
return field_models_paths_;
}
const std::vector<ModelGeneratorConfiguration>&
Options::model_generators_configuration() const {
return model_generators_configuration_;
}
const std::vector<std::string>& Options::rules_paths() const {
return rules_paths_;
}
const std::vector<std::string>& Options::lifecycles_paths() const {
return lifecycles_paths_;
}
const std::vector<std::string>& Options::proguard_configuration_paths() const {
return proguard_configuration_paths_;
}
const std::optional<std::string>& Options::generated_models_directory() const {
return generated_models_directory_;
}
const std::vector<std::string>& Options::generator_configuration_paths() const {
return generator_configuration_paths_;
}
const std::vector<std::string>& Options::model_generator_search_paths() const {
return model_generator_search_paths_;
}
const std::string& Options::repository_root_directory() const {
return repository_root_directory_;
}
const std::string& Options::source_root_directory() const {
return source_root_directory_;
}
const std::vector<std::string>& Options::source_exclude_directories() const {
return source_exclude_directories_;
}
const std::vector<std::string>& Options::system_jar_paths() const {
return system_jar_paths_;
}
const std::string& Options::apk_directory() const {
return apk_directory_;
}
const std::string& Options::dex_directory() const {
return dex_directory_;
}
const std::string& Options::apk_path() const {
return apk_path_;
}
const boost::filesystem::path Options::metadata_output_path() const {
return output_directory_ / "metadata.json";
}
const boost::filesystem::path Options::removed_symbols_output_path() const {
return output_directory_ / "removed_symbols.json";
}
const boost::filesystem::path Options::models_output_path() const {
return output_directory_;
}
const boost::filesystem::path Options::methods_output_path() const {
return output_directory_ / "methods.json";
}
const boost::filesystem::path Options::call_graph_output_path() const {
return output_directory_ / "call_graph.json";
}
const boost::filesystem::path Options::class_hierarchies_output_path() const {
return output_directory_ / "class_hierarchies.json";
}
const boost::filesystem::path Options::overrides_output_path() const {
return output_directory_ / "overrides.json";
}
const boost::filesystem::path Options::dependencies_output_path() const {
return output_directory_ / "dependencies.json";
}
bool Options::sequential() const {
return sequential_;
}
bool Options::skip_source_indexing() const {
return skip_source_indexing_;
}
bool Options::skip_model_generation() const {
return skip_model_generation_;
}
bool Options::disable_parameter_type_overrides() const {
return disable_parameter_type_overrides_;
}
bool Options::remove_unreachable_code() const {
return remove_unreachable_code_;
}
std::optional<int> Options::maximum_method_analysis_time() const {
return maximum_method_analysis_time_;
}
int Options::maximum_source_sink_distance() const {
return maximum_source_sink_distance_;
}
const std::vector<std::string>& Options::log_methods() const {
return log_methods_;
}
bool Options::dump_class_hierarchies() const {
return dump_class_hierarchies_;
}
bool Options::dump_overrides() const {
return dump_overrides_;
}
bool Options::dump_call_graph() const {
return dump_call_graph_;
}
bool Options::dump_dependencies() const {
return dump_dependencies_;
}
bool Options::dump_methods() const {
return dump_methods_;
}
const std::optional<std::string>& Options::job_id() const {
return job_id_;
}
const std::optional<std::string>& Options::metarun_id() const {
return metarun_id_;
}
} // namespace marianatrench