tools/redex-tool/DexSqlDump.cpp (369 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
/*
Workflow:
$ ./native/redex/redex.py -u <APK>
$ buck run //native/redex:redex-tool -- dex-sql-dump \
--apkdir <APKDIR> --dexendir <DEXEN_DIR> \
--jars <ANDROID_JAR> --proguard-map <RENAME_MAP> \
--output dex.sql
$ sqlite3 dex.db < dex.sql
$ sqlite3 dex.db "SELECT COUNT(*) FROM dex;" # verify sane-looking value
$ ./native/redex/tools/redex-tool/DexSqlQuery.py dex.db
<..enter queries..>
*/
#include <boost/algorithm/string/replace.hpp>
#include <queue>
#include <unordered_map>
#include <vector>
#include "ClassHierarchy.h"
#include "ControlFlow.h"
#include "DexOutput.h"
#include "IRCode.h"
#include "Resolver.h"
#include "Show.h"
#include "Tool.h"
#include "Walkers.h"
namespace {
static std::unordered_map<DexClass*, int> class_ids;
static std::unordered_map<DexMethod*, int> method_ids;
static std::unordered_map<DexField*, int> field_ids;
static std::unordered_map<const DexString*, int> string_ids;
void dump_field_refs(FILE* fdout,
const char* prefix,
DexField* field,
int field_id) {
static int next_string_ref = 0;
auto* static_value = field->get_static_value();
if (!static_value || (static_value->evtype() != DEVT_STRING)) return;
auto* static_string_value = static_cast<DexEncodedValueString*>(static_value);
auto string_id = string_ids[static_string_value->string()];
fprintf(fdout,
"INSERT INTO %sfield_string_refs VALUES (%d, %d, %d);\n",
prefix,
next_string_ref++,
field_id,
string_id);
}
void dump_method_refs(FILE* fdout,
const char* prefix,
DexMethod* method,
int method_id) {
auto code = method->get_code();
if (!code) return;
static int next_string_ref = 0;
static int next_class_ref = 0;
static int next_field_ref = 0;
static int next_method_ref = 0;
for (auto& mie : InstructionIterable(code)) {
auto insn = mie.insn;
if (insn->has_string()) {
if (string_ids.count(insn->get_string())) {
auto string_id = string_ids[insn->get_string()];
fprintf(fdout,
"INSERT INTO %smethod_string_refs VALUES (%d, %d, %d, %d);\n",
prefix,
next_string_ref++,
method_id,
string_id,
insn->opcode());
}
}
if (insn->has_type()) {
auto cls = type_class(insn->get_type());
if (cls && class_ids.count(cls)) {
auto class_id = class_ids[cls];
fprintf(fdout,
"INSERT INTO %smethod_class_refs VALUES (%d, %d, %d, %d);\n",
prefix,
next_class_ref++,
method_id,
class_id,
insn->opcode());
}
}
if (insn->has_field()) {
auto field = resolve_field(insn->get_field());
if (field != nullptr && field_ids.count(field)) {
auto field_id = field_ids[field];
fprintf(fdout,
"INSERT INTO %smethod_field_refs VALUES (%d, %d, %d, %d);\n",
prefix,
next_field_ref++,
method_id,
field_id,
insn->opcode());
}
}
if (insn->has_method()) {
auto meth =
resolve_method(insn->get_method(), opcode_to_search(insn), method);
if (meth != nullptr && method_ids.count(meth)) {
auto method_ref_id = method_ids[meth];
fprintf(fdout,
"INSERT INTO %smethod_method_refs VALUES (%d, %d, %d, %d);\n",
prefix,
next_method_ref++,
method_id,
method_ref_id,
insn->opcode());
}
}
}
}
void dump_class(FILE* fdout,
const char* prefix,
const char* dex_id,
DexClass* cls,
int class_id) {
// TODO: annotations?
// TODO: inheritance?
// TODO: string usage
// TODO: size estimate
const auto& deobfuscated_name = cls->get_deobfuscated_name();
fprintf(fdout,
"INSERT INTO %sclasses VALUES (%d,'%s','%s','%s',%u);\n",
prefix,
class_id,
dex_id,
deobfuscated_name.c_str(),
cls->get_name()->c_str(),
cls->get_access());
}
void dump_field(FILE* fdout,
const char* prefix,
int class_id,
DexField* field,
int field_id) {
// TODO: more fixup here on this crapped up name/signature
// TODO: break down signature
// TODO: annotations?
// TODO: string usage (encoded_value for static fields)
const auto& deobfuscated_name = field->get_deobfuscated_name();
auto field_name = strchr(deobfuscated_name.c_str(), ';');
fprintf(fdout,
"INSERT INTO %sfields VALUES(%d, %d, '%s', '%s', %u);\n",
prefix,
field_id,
class_id,
field_name,
field->get_name()->c_str(),
field->get_access());
}
void dump_method(FILE* fdout,
const char* prefix,
int class_id,
DexMethod* method,
int method_id) {
// TODO: more fixup here on this crapped up name/signature
// TODO: break down signature
// TODO: throws?
// TODO: annotations?
// TODO: string usage
// TODO: size estimate
const auto& deobfuscated_name = method->get_deobfuscated_name();
auto method_name = strchr(deobfuscated_name.c_str(), ';');
fprintf(fdout,
"INSERT INTO %smethods VALUES (%d,%d,'%s','%s',%d,%lu);\n",
prefix,
method_id,
class_id,
method_name,
method->get_name()->c_str(),
method->get_access(),
method->get_code() ? method->get_code()->sum_opcode_sizes() : 0);
}
void dump_sql(FILE* fdout,
DexStoresVector& stores,
ProguardMap& pg_map,
const char* prefix) {
fprintf(fdout,
R"___(
DROP TABLE IF EXISTS %1$sfield_string_refs;
DROP TABLE IF EXISTS %1$smethod_string_refs;
DROP TABLE IF EXISTS %1$smethod_field_refs;
DROP TABLE IF EXISTS %1$smethod_method_refs;
DROP TABLE IF EXISTS %1$smethod_class_refs;
DROP TABLE IF EXISTS %1$sstrings;
DROP TABLE IF EXISTS %1$sfields;
DROP TABLE IF EXISTS %1$sis_a;
DROP TABLE IF EXISTS %1$smethods;
DROP TABLE IF EXISTS %1$sclasses;
CREATE TABLE %1$sclasses (
id INTEGER PRIMARY KEY AUTOINCREMENT,
dex TEXT NOT NULL, -- dex identifiers look like "<store>/<dex_id>"
name TEXT NOT NULL,
obfuscated_name TEXT NOT NULL,
access INTEGER NOT NULL
);
CREATE TABLE %1$smethods (
id INTEGER PRIMARY KEY AUTOINCREMENT,
class_id INTEGER, -- fk:classes.id
name TEXT NOT NULL,
obfuscated_name TEXT NOT NULL,
access INTEGER NOT NULL,
code_size INTEGER NOT NULL
);
CREATE TABLE %1$sis_a (
id INTEGER PRIMARY KEY AUTOINCREMENT,
class_id INTEGER, -- fk:classes.id
is_a_class_id INTEGER -- fk:classes.id
);
CREATE TABLE %1$sstrings (
id INTEGER PRIMARY KEY AUTOINCREMENT,
text TEXT NOT NULL
);
CREATE TABLE %1$sfields (
id INTEGER PRIMARY KEY AUTOINCREMENT,
class_id INTEGER, -- fk:classes.id
name TEXT NOT NULL,
obfuscated_name TEXT NOT NULL,
access INTEGER NOT NULL
);
CREATE TABLE %1$sfield_string_refs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
field_id INTEGER NOT NULL, -- fk:fields.id
ref_string_id INTEGER NOT NULL -- fk:strings.id
);
CREATE TABLE %1$smethod_class_refs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
method_id INTEGER, -- fk:methods.id
ref_class_id INTEGER NOT NULL, -- fk:classes.id
opcode INTEGER NOT NULL
);
CREATE TABLE %1$smethod_method_refs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
method_id INTEGER, -- fk:methods.id
ref_method_id INTEGER NOT NULL, -- fk:methods.id
opcode INTEGER NOT NULL
);
CREATE TABLE %1$smethod_field_refs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
method_id INTEGER, -- fk:methods.id
ref_field_id INTEGER NOT NULL, -- fk:fields.id
opcode INTEGER NOT NULL
);
CREATE TABLE %1$smethod_string_refs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
method_id INTEGER, -- fk:methods.id
ref_string_id INTEGER NOT NULL, -- fk:strings.id
opcode INTEGER NOT NULL
);
)___",
prefix);
int next_class_id = 0;
int next_method_id = 0;
int next_field_id = 0;
int next_string_id = 0;
// Dump all dex items
fprintf(fdout, "BEGIN TRANSACTION;\n");
for (auto& store : stores) {
auto store_name = store.get_name();
auto& dexen = store.get_dexen();
apply_deobfuscated_names(dexen, pg_map);
for (size_t dex_idx = 0; dex_idx < dexen.size(); ++dex_idx) {
auto& dex = dexen[dex_idx];
GatheredTypes gtypes(&dex);
auto strings = gtypes.get_cls_order_dexstring_emitlist();
for (auto dexstr : strings) {
int id = next_string_id++;
string_ids[dexstr] = id;
// Escape string before inserting. ' -> ''
std::string esc(dexstr->c_str());
boost::replace_all(esc, "'", "''");
fprintf(fdout,
"INSERT INTO %sstrings VALUES(%d, '%s');\n",
prefix,
id,
esc.c_str());
}
std::string dex_id_str(store_name + "/" + std::to_string(dex_idx));
const char* dex_id = dex_id_str.c_str();
for (const auto& cls : dex) {
int class_id = next_class_id++;
dump_class(fdout, prefix, dex_id, cls, class_id);
class_ids[cls] = class_id;
for (auto field : cls->get_ifields()) {
int field_id = next_field_id++;
field_ids[field] = field_id;
dump_field(fdout, prefix, class_id, field, field_id);
}
for (auto field : cls->get_sfields()) {
int field_id = next_field_id++;
field_ids[field] = field_id;
dump_field(fdout, prefix, class_id, field, field_id);
}
for (const auto& meth : cls->get_dmethods()) {
int meth_id = next_method_id++;
method_ids[meth] = meth_id;
dump_method(fdout, prefix, class_id, meth, meth_id);
}
for (auto& meth : cls->get_vmethods()) {
int meth_id = next_method_id++;
method_ids[meth] = meth_id;
dump_method(fdout, prefix, class_id, meth, meth_id);
}
}
}
}
fprintf(fdout, "END TRANSACTION;\n");
// Dump references
fprintf(fdout, "BEGIN TRANSACTION;\n");
for (auto& store : stores) {
auto& dexen = store.get_dexen();
for (size_t dex_idx = 0; dex_idx < dexen.size(); ++dex_idx) {
auto& dex = dexen[dex_idx];
for (const auto& cls : dex) {
for (const auto& meth : cls->get_dmethods()) {
int meth_id = method_ids[meth];
dump_method_refs(fdout, prefix, meth, meth_id);
}
for (auto& meth : cls->get_vmethods()) {
int meth_id = method_ids[meth];
dump_method_refs(fdout, prefix, meth, meth_id);
}
for (const auto& field : cls->get_sfields()) {
int field_id = field_ids[field];
dump_field_refs(fdout, prefix, field, field_id);
}
for (const auto& field : cls->get_ifields()) {
int field_id = field_ids[field];
dump_field_refs(fdout, prefix, field, field_id);
}
}
}
}
fprintf(fdout, "END TRANSACTION;\n");
// Dump hierarchy
auto scope = build_class_scope(stores);
ClassHierarchy ch = build_type_hierarchy(scope);
int next_is_a_id = 0;
fprintf(fdout, "BEGIN TRANSACTION;\n");
for (auto& cls : scope) {
TypeSet results;
get_all_children_or_implementors(ch, scope, cls, results);
for (auto type : results) {
auto type_cls = type_class(type);
if (type_cls) {
fprintf(fdout,
"INSERT INTO %sis_a VALUES(%d, %d, %d);\n",
prefix,
next_is_a_id++,
class_ids[type_cls],
class_ids[cls]);
}
}
}
fprintf(fdout, "END TRANSACTION;\n");
}
class DexSqlDump : public Tool {
public:
DexSqlDump()
: Tool("dex-sql-dump", "dump an apk to a sql insertion script") {}
void add_options(po::options_description& options) const override {
add_standard_options(options);
options.add_options()(
"proguard-map,p",
po::value<std::string>()->value_name("redex-rename-map.txt"),
"path to a rename map")(
"output,o",
po::value<std::string>()->value_name("dex.sql"),
"path to output sql dump file (defaults to "
"stdout)")("table-prefix,t",
po::value<std::string>()->value_name("pre_"),
"prefix to use on all table names");
}
void run(const po::variables_map& options) override {
auto stores = init(options["jars"].as<std::string>(),
options["apkdir"].as<std::string>(),
options["dexendir"].as<std::string>());
ProguardMap pgmap(options.count("proguard-map")
? options["proguard-map"].as<std::string>()
: "/dev/null");
const std::string& filename = options["output"].as<std::string>();
FILE* fdout =
options.count("output") ? fopen(filename.c_str(), "w") : stdout;
std::string prefix = options.count("table-prefix")
? options["table-prefix"].as<std::string>()
: "";
if (!fdout) {
fprintf(stderr,
"Could not open %s for writing; terminating\n",
filename.c_str());
exit(EXIT_FAILURE);
}
auto* pfx_cstr = prefix.c_str();
dump_sql(fdout, stores, pgmap, pfx_cstr);
fclose(fdout);
}
};
static DexSqlDump s_tool;
} // namespace