libredex/ProguardMap.cpp (575 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "ProguardMap.h"
#include "DexPosition.h"
#include "DexUtil.h"
#include "IRCode.h"
#include "Show.h"
#include "Timer.h"
#include "Trace.h"
#include "WorkQueue.h"
namespace {
std::string find_or_same(
const std::string& key,
const std::unordered_map<std::string, std::string>& map) {
auto it = map.find(key);
if (it == map.end()) return key;
return it->second;
}
std::string convert_scalar_type(const std::string& type) {
static const std::unordered_map<std::string, std::string> prim_map = {
{"void", "V"}, {"boolean", "Z"}, {"byte", "B"},
{"short", "S"}, {"char", "C"}, {"int", "I"},
{"long", "J"}, {"float", "F"}, {"double", "D"}};
auto it = prim_map.find(type);
if (it != prim_map.end()) {
return it->second;
}
return java_names::external_to_internal(type);
}
std::string convert_scalar_type(std::string_view type) {
static const std::unordered_map<std::string_view, std::string> prim_map = {
{"void", "V"}, {"boolean", "Z"}, {"byte", "B"},
{"short", "S"}, {"char", "C"}, {"int", "I"},
{"long", "J"}, {"float", "F"}, {"double", "D"}};
auto it = prim_map.find(type);
if (it != prim_map.end()) {
return it->second;
}
return java_names::external_to_internal(type);
}
std::string convert_field(const std::string& cls,
const std::string& type,
const std::string& name) {
std::ostringstream ss;
ss << cls << "." << name;
if (!type.empty()) {
ss << ":" << type;
}
return ss.str();
}
std::string convert_method(const std::string& cls,
const std::string& rtype,
const std::string& methodname,
const std::string& args) {
std::ostringstream ss;
ss << cls << "." << methodname << ":(" << args << ")" << rtype;
return ss.str();
}
std::string translate_type(const std::string& type, const ProguardMap& pm) {
auto base_start = type.find_first_not_of('[');
auto array_prefix = type.substr(0, base_start);
auto base_type = type.substr(base_start);
array_prefix += pm.translate_class(base_type);
return array_prefix;
}
void whitespace(const char*& p) {
while (isspace(*p)) {
++p;
}
}
uint32_t line_number(const char*& p) {
char* e;
uint32_t line = std::strtol(p, &e, 10);
p = e;
return line;
}
bool isseparator(uint32_t cp) {
return cp == '\0' || cp == ' ' || cp == ':' || cp == ',' || cp == '\n' ||
cp == '(' || cp == ')';
}
template <typename F>
bool id(const char*& p, std::string& s, F isseparator) {
auto b = p;
auto first = mutf8_next_code_point(p);
if (isdigit(first)) return false;
while (true) {
auto prev = p;
auto cp = mutf8_next_code_point(p);
if (isseparator(cp)) {
p = prev;
s = std::string(b, p - b);
return true;
}
}
}
bool id(const char*& p, std::string& s) { return id(p, s, isseparator); }
bool literal(const char*& p, const char* s) {
auto len = strlen(s);
bool rv = !strncmp(p, s, len);
p += len;
return rv;
}
bool literal(const char*& p, char s) {
if (*p == s) {
++p;
return true;
}
return false;
}
bool field_full_format(const char*& p, std::string& s) {
std::string class_name;
std::string field_name;
std::string type;
if (!id(p, class_name, [](uint32_t s) { return s == ';'; })) {
return false;
}
if (!literal(p, ";.")) {
return false;
}
if (!id(p, field_name, [](uint32_t s) { return s == ':'; })) {
return false;
}
if (!literal(p, ":")) {
return false;
}
if (!id(p, type, [](uint32_t s) {
return s == ' ' || s == '\n' || s == '\0';
})) {
return false;
}
s = class_name + ";." + field_name + ":" + type;
return true;
}
bool method_full_format(const char*& p, std::string& s) {
std::string class_name;
std::string method_name;
std::string args;
std::string rtype;
if (!id(p, class_name, [](uint32_t s) { return s == ';'; })) {
return false;
}
if (!literal(p, ";.")) {
return false;
}
if (!id(p, method_name, [](uint32_t s) { return s == ':'; })) {
return false;
}
if (!literal(p, ":(")) {
return false;
}
if (!literal(p, ')')) {
if (!id(p, args, [](uint32_t s) { return s == ')'; })) {
return false;
}
if (!literal(p, ')')) {
return false;
}
}
if (!id(p, rtype, [](uint32_t s) {
return s == ' ' || s == '\n' || s == '\0';
})) {
return false;
}
s = class_name + ";." + method_name + ":(" + args + ")" + rtype;
return true;
}
bool comment(const std::string& line) {
auto p = line.c_str();
whitespace(p);
return literal(p, '#');
}
void inlined_method(std::string& classname, std::string& methodname) {
std::size_t found = methodname.find_last_of('.');
if (found != std::string::npos) {
classname = convert_scalar_type(methodname.substr(0, found));
methodname = methodname.substr(found + 1);
}
}
/**
* Proguard would generate some special sequences when a coalesced interface is
* used.
* https://sourceforge.net/p/proguard/code/ci/default/tree/core/src/proguard/classfile/editor/ClassReferenceFixer.java#l554
* Before:
* com.facebook.imagepipeline.core.ExecutorSupplier mExecutorSupplier;
* After:
* a_vcard.android.syncml.pim.VBuilder mExecutorSupplier$7ec36e13 -> b
*/
bool is_maybe_proguard_generated_member(const std::string& s) {
unsigned int count = 0;
for (auto it = s.rbegin(); it != s.rend(); ++it, ++count) {
if (isxdigit(*it)) continue;
if (*it == '$') return count == 8;
return false;
}
return false;
}
} // namespace
ProguardMap::ProguardMap(const std::string& filename, bool use_new_rename_map) {
if (filename.empty()) {
return;
}
Timer t("Parsing proguard map");
std::ifstream fp(filename);
always_assert_log(fp, "Can't open proguard map: %s\n", filename.c_str());
if (use_new_rename_map) {
parse_full_map(fp);
} else {
parse_proguard_map(fp);
}
}
std::string ProguardMap::translate_class(const std::string& cls) const {
return find_or_same(cls, m_classMap);
}
std::string ProguardMap::translate_field(const std::string& field) const {
return find_or_same(field, m_fieldMap);
}
std::string ProguardMap::translate_method(const std::string& method) const {
return find_or_same(method, m_methodMap);
}
std::string ProguardMap::deobfuscate_class(const std::string& cls) const {
return find_or_same(cls, m_obfClassMap);
}
std::string ProguardMap::deobfuscate_field(const std::string& field) const {
return find_or_same(find_or_same(field, m_obfFieldMap), m_obfUntypedFieldMap);
}
std::string ProguardMap::deobfuscate_method(const std::string& method) const {
return find_or_same(find_or_same(method, m_obfMethodMap),
m_obfUntypedMethodMap);
}
std::vector<ProguardMap::Frame> ProguardMap::deobfuscate_frame(
const DexString* method_name, uint32_t line) const {
std::vector<Frame> frames;
auto ranges_it =
m_obfMethodLinesMap.find(pg_impl::lines_key(method_name->str()));
if (ranges_it != m_obfMethodLinesMap.end()) {
for (const auto& range : ranges_it->second) {
if (!range->matches(line)) {
continue;
}
auto new_line = line;
if (range->remaps_to_single_line()) {
new_line = range->original_start;
} else if (range->remaps_to_range()) {
new_line = range->original_start + line - range->start;
}
frames.emplace_back(DexString::make_string(range->original_name),
new_line);
}
}
if (frames.empty()) {
return {Frame(method_name, line)};
}
return frames;
}
ProguardLineRangeVector& ProguardMap::method_lines(
const std::string& obfuscated_method) {
return m_obfMethodLinesMap.at(pg_impl::lines_key(obfuscated_method));
}
void ProguardMap::parse_proguard_map(std::istream& fp) {
std::string line;
while (std::getline(fp, line)) {
parse_class(line);
}
fp.clear();
fp.seekg(0);
assert_log(!fp.fail(), "Can't use ProguardMap with non-seekable stream");
while (std::getline(fp, line)) {
if (parse_class(line)) {
continue;
}
if (parse_field(line)) {
continue;
}
if (parse_method(line)) {
continue;
}
if (comment(line)) {
continue;
}
not_reached_log("Bogus line encountered in proguard map: %s\n",
line.c_str());
}
}
void ProguardMap::parse_full_map(std::istream& fp) {
std::string line;
fp.seekg(0);
assert_log(!fp.fail(),
"Can't use the full rename map with non-seekable stream");
while (std::getline(fp, line)) {
if (parse_class_full_format(line)) {
continue;
}
if (parse_store_full_format(line)) {
continue;
}
if (parse_field_full_format(line)) {
continue;
}
if (parse_method_full_format(line)) {
continue;
}
if (comment(line)) {
continue;
}
not_reached_log("Bogus line encountered in the full map: %s\n",
line.c_str());
}
}
bool ProguardMap::parse_class_full_format(const std::string& line) {
std::string old_class_name;
std::string new_class_name;
auto p = line.c_str();
if (!literal(p, "type ")) return false;
if (!id(p, old_class_name)) return false;
if (!literal(p, " -> ")) return false;
if (!id(p, new_class_name)) return false;
m_currClass = old_class_name;
m_currNewClass = new_class_name;
m_classMap[m_currClass] = m_currNewClass;
m_obfClassMap[m_currNewClass] = m_currClass;
return true;
}
bool ProguardMap::parse_store_full_format(const std::string& line) {
auto p = line.c_str();
if (!literal(p, "store` ")) {
return false;
}
// We don't care about stores here yet
return true;
}
bool ProguardMap::parse_field_full_format(const std::string& line) {
std::string old_field_name;
std::string new_field_name;
auto p = line.c_str();
if (!literal(p, "ifield ")) {
// Reset the field pointer.
p = line.c_str();
if (!literal(p, "sfield ")) {
return false;
}
}
if (!field_full_format(p, old_field_name)) {
return false;
}
if (!literal(p, " -> ")) {
return false;
}
if (!field_full_format(p, new_field_name)) {
return false;
}
auto pgnew = new_field_name;
auto pgold = old_field_name;
m_fieldMap[pgold] = pgnew;
m_obfFieldMap[pgnew] = pgold;
return true;
}
bool ProguardMap::parse_method_full_format(const std::string& line) {
std::string old_method_name;
std::string new_method_name;
auto p = line.c_str();
if (!literal(p, "dmethod ")) {
// Reset the method pointer.
p = line.c_str();
if (!literal(p, "vmethod ")) {
return false;
}
}
if (!method_full_format(p, old_method_name)) {
return false;
}
if (!literal(p, " -> ")) {
return false;
}
if (!method_full_format(p, new_method_name)) {
return false;
}
auto pgold = old_method_name;
auto pgnew = new_method_name;
m_methodMap[pgold] = pgnew;
m_obfMethodMap[pgnew] = pgold;
return true;
}
bool ProguardMap::parse_class(const std::string& line) {
std::string classname;
std::string newname;
auto p = line.c_str();
if (!id(p, classname)) return false;
if (!literal(p, " -> ")) return false;
if (!id(p, newname)) return false;
m_currClass = convert_type(classname);
m_currNewClass = convert_type(newname);
m_classMap[m_currClass] = m_currNewClass;
m_obfClassMap[m_currNewClass] = m_currClass;
return true;
}
bool ProguardMap::parse_field(const std::string& line) {
std::string type;
std::string fieldname;
std::string newname;
auto p = line.c_str();
whitespace(p);
if (!id(p, type)) return false;
whitespace(p);
if (!id(p, fieldname)) return false;
if (!literal(p, " -> ")) return false;
if (!id(p, newname)) return false;
auto ctype = convert_type(type);
auto xtype = translate_type(ctype, *this);
auto pgnew = convert_field(m_currNewClass, xtype, newname);
auto pgnew_notype = convert_field(m_currNewClass, "", newname);
auto pgold = convert_field(m_currClass, ctype, fieldname);
// Record interfaces that are coalesced by Proguard.
if (ctype[0] == 'L' && is_maybe_proguard_generated_member(fieldname)) {
fprintf(stderr,
"Type '%s' is touched by Proguard in '%s'\n",
ctype.c_str(),
pgold.c_str());
m_pg_coalesced_interfaces.insert(ctype);
}
m_fieldMap[pgold] = pgnew;
m_obfFieldMap[pgnew] = pgold;
m_obfUntypedFieldMap[pgnew_notype] = pgold;
return true;
}
bool ProguardMap::parse_method(const std::string& line) {
std::string type;
std::string methodname;
std::string classname = m_currClass;
std::string old_args;
std::string new_args;
std::string newname;
auto lines = std::make_unique<ProguardLineRange>();
auto p = line.c_str();
whitespace(p);
lines->start = line_number(p);
literal(p, ':');
lines->end = line_number(p);
literal(p, ':');
if (!id(p, type)) return false;
whitespace(p);
if (!id(p, methodname)) return false;
inlined_method(classname, methodname);
if (!literal(p, '(')) return false;
while (true) {
std::string arg;
if (literal(p, ')')) break;
id(p, arg);
auto old_arg = convert_type(arg);
auto new_arg = translate_type(old_arg, *this);
old_args += old_arg;
new_args += new_arg;
literal(p, ',');
}
literal(p, ':');
lines->original_start = line_number(p);
literal(p, ':');
lines->original_end = line_number(p);
literal(p, " -> ");
if (!id(p, newname)) return false;
auto old_rtype = convert_type(type);
auto new_rtype = translate_type(old_rtype, *this);
auto pgold = convert_method(classname, old_rtype, methodname, old_args);
auto pgnew = convert_method(m_currNewClass, new_rtype, newname, new_args);
auto pgnew_no_rtype = convert_method(m_currNewClass, "", newname, new_args);
m_methodMap[pgold] = pgnew;
m_obfMethodMap[pgnew] = pgold;
m_obfUntypedMethodMap[pgnew_no_rtype] = pgold;
lines->original_name = pgold;
m_obfMethodLinesMap[pg_impl::lines_key(pgnew)].push_back(std::move(lines));
return true;
}
namespace pg_impl {
/*
* Given a string "Lcom/foo/Bar;.a:()I", return "Bar.java". If we have a method
* called on an inner class like "Baz$Inner", use just the outer class for the
* source file name -- in this case we would return "Baz.java".
*/
const DexString* file_name_from_method_string(const DexString* method) {
const auto& s = method->str();
auto end = s.rfind(";.");
auto innercls_pos = s.rfind('$', end);
if (innercls_pos != std::string::npos) {
end = innercls_pos;
}
always_assert(end != std::string::npos);
auto start = s.rfind('/', end);
if (start != std::string::npos) {
++start; // Skip over the "/"
} else {
start = 1; // Skip over the "L"
}
return DexString::make_string(s.substr(start, end - start) + ".java");
}
static void apply_deobfuscated_positions(DexMethod* method,
const ProguardMap& pm) {
auto* code = method->get_code();
if (code == nullptr) {
return;
}
apply_deobfuscated_positions(code, pm);
}
void apply_deobfuscated_positions(IRCode* code, const ProguardMap& pm) {
for (auto& mie : *code) {
if (mie.type != MFLOW_POSITION) {
continue;
}
auto* pos = mie.pos.get();
const auto& remapped_frames = pm.deobfuscate_frame(pos->method, pos->line);
auto it = remapped_frames.begin();
// Make sure we don't update pos->file if the method and line numbers are
// unchanged. file_name_from_method_string() is only a best guess at the
// real file name.
if (pos->method != it->method || pos->line != it->line) {
pos->method = it->method;
pos->file = file_name_from_method_string(it->method);
pos->line = it->line;
}
// There may be multiple remapped frames if the given instruction was
// inlined. Create a linked list of DexPositions corresponding to the call
// chain.
auto insert_it = code->iterator_to(mie);
for (auto prev = it++; it != remapped_frames.end(); prev = it++) {
auto next_pos = std::make_unique<DexPosition>(
it->method, file_name_from_method_string(it->method), it->line);
pos->parent = next_pos.get();
pos = next_pos.get();
insert_it = code->insert_before(insert_it, std::move(next_pos));
}
}
}
/**
* method_name should be a method as returned from convert_method
*/
std::string lines_key(const std::string& method_name) {
std::size_t end = method_name.rfind(':');
always_assert(end != std::string::npos);
return method_name.substr(0, end);
}
} // namespace pg_impl
void apply_deobfuscated_names(const std::vector<DexClasses>& dexen,
const ProguardMap& pm) {
std::function<void(DexClass*)> worker_empty_pg_map = [&](DexClass* cls) {
cls->set_deobfuscated_name(show(cls));
for (const auto& m : cls->get_dmethods()) {
m->set_deobfuscated_name(show(m));
}
for (const auto& m : cls->get_vmethods()) {
m->set_deobfuscated_name(show(m));
}
for (const auto& f : cls->get_ifields()) {
f->set_deobfuscated_name(show(f));
}
for (const auto& f : cls->get_sfields()) {
f->set_deobfuscated_name(show(f));
}
};
std::function<void(DexClass*)> worker_pg_map = [&](DexClass* cls) {
TRACE(PGR, 4, "deob cls %s %s", SHOW(cls),
pm.deobfuscate_class(show(cls)).c_str());
cls->set_deobfuscated_name(pm.deobfuscate_class(show(cls)));
for (const auto& m : cls->get_dmethods()) {
TRACE(PGR, 4, "deob dmeth %s %s", SHOW(m),
pm.deobfuscate_method(show(m)).c_str());
m->set_deobfuscated_name(pm.deobfuscate_method(show(m)));
pg_impl::apply_deobfuscated_positions(m, pm);
}
for (const auto& m : cls->get_vmethods()) {
TRACE(PM, 4, "deob vmeth %s %s", SHOW(m),
pm.deobfuscate_method(show(m)).c_str());
m->set_deobfuscated_name(pm.deobfuscate_method(show(m)));
pg_impl::apply_deobfuscated_positions(m, pm);
}
for (const auto& f : cls->get_ifields()) {
TRACE(PM, 4, "deob ifield %s %s", SHOW(f),
pm.deobfuscate_field(show(f)).c_str());
f->set_deobfuscated_name(pm.deobfuscate_field(show(f)));
}
for (const auto& f : cls->get_sfields()) {
TRACE(PM, 4, "deob sfield %s %s", SHOW(f),
pm.deobfuscate_field(show(f)).c_str());
f->set_deobfuscated_name(pm.deobfuscate_field(show(f)));
}
};
auto wq = workqueue_foreach<DexClass*>(pm.empty() ? worker_empty_pg_map
: worker_pg_map);
for (const auto& dex : dexen) {
for (const auto& cls : dex) {
wq.add_item(cls);
}
}
wq.run_all();
}
std::string convert_type(const std::string& type) {
return convert_type(std::string_view(type));
}
std::string convert_type(std::string_view type) {
auto dimpos = type.find('[');
if (dimpos == std::string::npos) {
return convert_scalar_type(type);
}
auto ndims = std::count(type.begin() + dimpos, type.end(), '[');
std::string res(ndims, '[');
return res + convert_scalar_type(type.substr(0, dimpos));
}