libredex/RedexContext.cpp (512 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "RedexContext.h"
#include <exception>
#include <mutex>
#include <regex>
#include <sstream>
#include <unordered_set>
#include "Debug.h"
#include "DexCallSite.h"
#include "DexClass.h"
#include "DexPosition.h"
#include "DuplicateClasses.h"
#include "KeepReason.h"
#include "ProguardConfiguration.h"
#include "Show.h"
#include "Timer.h"
#include "Trace.h"
#include "WorkQueue.h"
static_assert(std::is_same<DexTypeList::ContainerType,
RedexContext::DexTypeListContainerType>::value);
RedexContext* g_redex;
RedexContext::RedexContext(bool allow_class_duplicates)
: m_allow_class_duplicates(allow_class_duplicates) {}
RedexContext::~RedexContext() {
std::vector<std::function<void()>> fns{
[&] {
Timer timer("Delete DexTypes", /* indent */ false);
// NB: This table intentionally contains aliases (multiple
// DexStrings map to the same DexType), so we have to dedup the set of
// types before deleting to avoid double-frees.
std::unordered_set<DexType*> delete_types;
for (auto const& p : s_type_map) {
if (delete_types.emplace(p.second).second) {
delete p.second;
}
}
s_type_map.clear();
},
[&] {
Timer timer("DexTypeLists", /* indent */ false);
for (auto const& p : s_typelist_map) {
delete p.second;
}
s_typelist_map.clear();
},
[&] {
Timer timer("Delete DexProtos.", /* indent */ false);
for (auto const& p : s_proto_map) {
delete p.second;
}
s_proto_map.clear();
},
[&] {
Timer timer("Delete DexClasses", /* indent */ false);
for (auto const& p : m_type_to_class) {
delete p.second;
}
m_type_to_class.clear();
},
[&] {
Timer timer("Delete DexLocations", /* indent */ false);
for (auto const& p : s_location_map) {
delete p.second;
}
s_location_map.clear();
},
[&] {
Timer timer("release_keep_reasons", /* indent */ false);
keep_reason::Reason::release_keep_reasons();
},
[&] {
Timer timer("m_destruction_tasks", /* indent */ false);
for (const Task& t : m_destruction_tasks) {
t();
}
m_destruction_tasks.clear();
},
[&] {
Timer timer("delete m_position_pattern_switch_manager",
/* indent */ false);
delete m_position_pattern_switch_manager;
},
[&] {
Timer timer("misc", /* indent */ false);
m_external_classes.clear();
field_values.clear();
method_return_values.clear();
}};
// Deleting fields and methods is especially expensive, so we do it by
// "buckets".
const size_t method_buckets_count = 16;
for (size_t bucket = 0; bucket < method_buckets_count; bucket++) {
fns.push_back([bucket, this]() {
Timer timer("Delete DexMethods/" + std::to_string(bucket),
/* indent */ false);
// Delete DexMethods. Use set to prevent double freeing aliases
std::unordered_set<DexMethod*> delete_methods;
for (auto const& it : s_method_map) {
auto method = static_cast<DexMethod*>(it.second);
if ((reinterpret_cast<size_t>(method) >> 16) % method_buckets_count ==
bucket &&
delete_methods.emplace(method).second) {
delete method;
}
}
});
}
const size_t field_buckets_count = 4;
for (size_t bucket = 0; bucket < field_buckets_count; bucket++) {
fns.push_back([bucket, this]() {
Timer timer("Delete DexFields/" + std::to_string(bucket),
/* indent */ false);
// Delete DexFields. Use set to prevent double freeing aliases
std::unordered_set<DexField*> delete_fields;
for (auto const& it : s_field_map) {
auto field = static_cast<DexField*>(it.second);
if ((reinterpret_cast<size_t>(field) >> 16) % field_buckets_count ==
bucket &&
delete_fields.emplace(field).second) {
delete field;
}
}
});
}
size_t segment_index{0};
for (auto& segment : s_string_map) {
fns.push_back([&segment, index = segment_index++]() {
Timer timer("Delete DexStrings segment/" + std::to_string(index),
/* indent */ false);
for (auto const& p : segment) {
delete p.second;
}
segment.clear();
});
}
workqueue_run<std::function<void()>>([](std::function<void()>& fn) { fn(); },
fns);
s_method_map.clear();
}
/*
* Try and insert (:key, :value) into :container. This insertion may fail if
* another thread has already inserted that key. In that case, return the
* existing value and discard the one we were trying to insert.
*
* We distinguish between the types of the inserted and stored values to handle
* DexFields and DexMethods, where we upcast the inserted value into a
* DexFieldRef / DexMethodRef respectively when storing it.
*/
template <class InsertValue,
class StoredValue = InsertValue,
class Deleter = std::default_delete<InsertValue>,
class Key,
class Container>
static StoredValue* try_insert(Key key,
InsertValue* value,
Container* container) {
std::unique_ptr<InsertValue, Deleter> to_insert(value);
if (container->emplace(key, to_insert.get())) {
return to_insert.release();
}
return container->at(key);
}
const DexString* RedexContext::make_string(std::string_view str) {
auto& segment = s_string_map.at(str);
auto rv = segment.get(str, nullptr);
if (rv != nullptr) {
return rv;
}
// Note that DexStrings are keyed by the string_view of the underlying
// std::string. The string_view is valid until a the string is destroyed, or
// until a non-const function is called on the string (but note the
// std::string itself is const)
auto dexstring = new DexString(std::string(str));
auto p2 = std::string_view(dexstring->c_str(), str.size());
return try_insert<DexString, const DexString>(p2, dexstring, &segment);
}
const DexString* RedexContext::get_string(std::string_view str) {
auto& segment = s_string_map.at(str);
return segment.get(str, nullptr);
}
DexType* RedexContext::make_type(const DexString* dstring) {
always_assert(dstring != nullptr);
auto rv = s_type_map.get(dstring, nullptr);
if (rv != nullptr) {
return rv;
}
return try_insert(dstring, new DexType(dstring), &s_type_map);
}
DexType* RedexContext::get_type(const DexString* dstring) {
if (dstring == nullptr) {
return nullptr;
}
return s_type_map.get(dstring, nullptr);
}
void RedexContext::set_type_name(DexType* type, const DexString* new_name) {
alias_type_name(type, new_name);
type->m_name = new_name;
}
void RedexContext::alias_type_name(DexType* type, const DexString* new_name) {
always_assert_log(
!s_type_map.count(new_name),
"Bailing, attempting to alias a symbol that already exists! '%s'\n",
new_name->c_str());
s_type_map.emplace(new_name, type);
}
void RedexContext::remove_type_name(const DexString* name) {
s_type_map.erase(name);
}
DexFieldRef* RedexContext::make_field(const DexType* container,
const DexString* name,
const DexType* type) {
always_assert(container != nullptr && name != nullptr && type != nullptr);
DexFieldSpec r(const_cast<DexType*>(container), name,
const_cast<DexType*>(type));
auto rv = s_field_map.get(r, nullptr);
if (rv != nullptr) {
return rv;
}
auto field = new DexField(const_cast<DexType*>(container), name,
const_cast<DexType*>(type));
return try_insert<DexField, DexFieldRef>(r, field, &s_field_map);
}
DexFieldRef* RedexContext::get_field(const DexType* container,
const DexString* name,
const DexType* type) {
if (container == nullptr || name == nullptr || type == nullptr) {
return nullptr;
}
DexFieldSpec r(const_cast<DexType*>(container), name,
const_cast<DexType*>(type));
return s_field_map.get(r, nullptr);
}
void RedexContext::alias_field_name(DexFieldRef* field,
const DexString* new_name) {
DexFieldSpec r(field->m_spec.cls, new_name, field->m_spec.type);
always_assert_log(
!s_field_map.count(r),
"Bailing, attempting to alias a symbol that already exists! '%s'\n",
new_name->c_str());
s_field_map.emplace(r, field);
}
void RedexContext::erase_field(DexFieldRef* field) {
s_field_map.erase(field->m_spec);
}
void RedexContext::erase_field(const DexType* container,
const DexString* name,
const DexType* type) {
DexFieldSpec r(const_cast<DexType*>(container), name,
const_cast<DexType*>(type));
s_field_map.erase(r);
}
void RedexContext::mutate_field(DexFieldRef* field,
const DexFieldSpec& ref,
bool rename_on_collision) {
std::lock_guard<std::mutex> lock(s_field_lock);
DexFieldSpec& r = field->m_spec;
s_field_map.erase(r);
r.cls = ref.cls != nullptr ? ref.cls : field->m_spec.cls;
r.name = ref.name != nullptr ? ref.name : field->m_spec.name;
r.type = ref.type != nullptr ? ref.type : field->m_spec.type;
field->m_spec = r;
if (rename_on_collision && s_field_map.find(r) != s_field_map.end()) {
uint32_t i = 0;
while (true) {
r.name = DexString::make_string(("f$" + std::to_string(i++)).c_str());
if (s_field_map.find(r) == s_field_map.end()) {
break;
}
}
}
always_assert_log(s_field_map.find(r) == s_field_map.end(),
"Another field with the same signature already exists %s",
SHOW(s_field_map.at(r)));
s_field_map.emplace(r, field);
}
DexTypeList* RedexContext::make_type_list(
RedexContext::DexTypeListContainerType&& p) {
auto rv = s_typelist_map.get(&p, nullptr);
if (rv != nullptr) {
return rv;
}
auto typelist = new DexTypeList(std::move(p));
return try_insert(&typelist->m_list, typelist, &s_typelist_map);
}
DexTypeList* RedexContext::get_type_list(
const RedexContext::DexTypeListContainerType& p) {
return s_typelist_map.get(&p, nullptr);
}
DexProto* RedexContext::make_proto(const DexType* rtype,
const DexTypeList* args,
const DexString* shorty) {
always_assert(rtype != nullptr && args != nullptr && shorty != nullptr);
ProtoKey key(rtype, args);
auto rv = s_proto_map.get(key, nullptr);
if (rv != nullptr) {
return rv;
}
return try_insert(key,
new DexProto(const_cast<DexType*>(rtype),
const_cast<DexTypeList*>(args),
shorty),
&s_proto_map);
}
DexProto* RedexContext::get_proto(const DexType* rtype,
const DexTypeList* args) {
if (rtype == nullptr || args == nullptr) {
return nullptr;
}
return s_proto_map.get(ProtoKey(rtype, args), nullptr);
}
DexMethodRef* RedexContext::make_method(const DexType* type_,
const DexString* name_,
const DexProto* proto_) {
// Ideally, DexMethodSpec would store const types, then these casts wouldn't
// be necessary, but that would involve cleaning up quite a bit of existing
// code.
auto type = const_cast<DexType*>(type_);
auto name = name_;
auto proto = const_cast<DexProto*>(proto_);
always_assert(type != nullptr && name != nullptr && proto != nullptr);
DexMethodSpec r(type, name, proto);
auto rv = s_method_map.get(r, nullptr);
if (rv != nullptr) {
return rv;
}
return try_insert<DexMethod, DexMethodRef, DexMethod::Deleter>(
r, new DexMethod(type, name, proto), &s_method_map);
}
DexMethodRef* RedexContext::get_method(const DexType* type,
const DexString* name,
const DexProto* proto) {
if (type == nullptr || name == nullptr || proto == nullptr) {
return nullptr;
}
DexMethodSpec r(const_cast<DexType*>(type), name,
const_cast<DexProto*>(proto));
return s_method_map.get(r, nullptr);
}
void RedexContext::alias_method_name(DexMethodRef* method,
const DexString* new_name) {
DexMethodSpec r(method->m_spec.cls, new_name, method->m_spec.proto);
always_assert_log(
!s_method_map.count(r),
"Bailing, attempting to alias a symbol that already exists! '%s'\n",
new_name->c_str());
s_method_map.emplace(r, method);
}
void RedexContext::erase_method(DexMethodRef* method) {
s_method_map.erase(method->m_spec);
// Also remove the alias from the map
if (method->is_def()) {
if (method->DexMethodRef::as_def()->get_deobfuscated_name_or_null() !=
nullptr) {
DexMethodSpec r(method->m_spec.cls,
&method->DexMethodRef::as_def()->get_deobfuscated_name(),
method->m_spec.proto);
s_method_map.erase(r);
}
}
}
void RedexContext::erase_method(const DexType* type,
const DexString* name,
const DexProto* proto) {
DexMethodSpec r(const_cast<DexType*>(type), name,
const_cast<DexProto*>(proto));
s_method_map.erase(r);
}
// TODO: Need a better interface.
void RedexContext::mutate_method(DexMethodRef* method,
const DexMethodSpec& new_spec,
bool rename_on_collision) {
std::lock_guard<std::mutex> lock(s_method_lock);
DexMethodSpec old_spec = method->m_spec;
s_method_map.erase(method->m_spec);
DexMethodSpec& r = method->m_spec;
r.cls = new_spec.cls != nullptr ? new_spec.cls : method->m_spec.cls;
r.name = new_spec.name != nullptr ? new_spec.name : method->m_spec.name;
r.proto = new_spec.proto != nullptr ? new_spec.proto : method->m_spec.proto;
if (s_method_map.count(r) && rename_on_collision) {
// Never rename constructors, which causes runtime verification error:
// "Method 42(Foo;.$init$$0) is marked constructor, but doesn't match name"
always_assert_log(
show(r.name) != "<init>" && show(r.name) != "<clinit>",
"you should not rename constructor on a collision, %s.%s:%s exists",
SHOW(r.cls), SHOW(r.name), SHOW(r.proto));
if (new_spec.cls == nullptr || new_spec.cls == old_spec.cls) {
// Either method prototype or name is going to be changed, and we hit a
// collision. Make an unique name: "name$[0-9]+". But in case of <clinit>,
// libdex rejects a name like "<clinit>$1". See:
// http://androidxref.com/9.0.0_r3/xref/dalvik/libdex/DexUtf.cpp#115
// Valid characters can be found here: [_a-zA-Z0-9$\-]
// http://androidxref.com/9.0.0_r3/xref/dalvik/libdex/DexUtf.cpp#50
// If a method name begins with "<", it must end with ">". We generate a
// name like "$clinit$$42" by replacing <, > with $.
uint32_t i = 0;
std::string prefix;
if (r.name->str().front() == '<') {
redex_assert(r.name->str().back() == '>');
prefix =
"$" + r.name->str().substr(1, r.name->str().length() - 2) + "$$";
} else {
prefix = r.name->str() + "$";
}
do {
r.name = DexString::make_string((prefix + std::to_string(i++)).c_str());
} while (s_method_map.count(r));
} else {
// We are about to change its class. Use a better name to remember its
// original source class on a collision. Tokenize the class name into
// parts, and use them until no more collison.
//
// "com/facebook/foo/Bar;" => {"com", "facebook", "foo", "Bar"}
std::string cls_name = show_deobfuscated(old_spec.cls);
std::regex separator{"[/;]"};
std::vector<std::string> parts;
std::copy(std::sregex_token_iterator(cls_name.begin(), cls_name.end(),
separator, -1),
std::sregex_token_iterator(),
std::back_inserter(parts));
// Make a name like "name$Bar$foo", or "$clinit$$Bar$foo".
std::stringstream ss;
if (old_spec.name->str().front() == '<') {
ss << "$"
<< old_spec.name->str().substr(1, old_spec.name->str().length() - 2)
<< "$";
} else {
ss << *old_spec.name;
}
for (auto part = parts.rbegin(); part != parts.rend(); ++part) {
ss << "$" << *part;
r.name = DexString::make_string(ss.str());
if (!s_method_map.count(r)) {
break;
}
}
}
}
// We might still miss name collision cases. As of now, let's just assert.
if (s_method_map.count(r)) {
always_assert_log(!s_method_map.count(r),
"Another method of the same signature already exists %s"
" %s %s",
SHOW(r.cls), SHOW(r.name), SHOW(r.proto));
}
s_method_map.emplace(r, method);
}
DexLocation* RedexContext::make_location(std::string_view store_name,
std::string_view file_name) {
auto key = std::make_pair(store_name, file_name);
auto rv = s_location_map.get(key, nullptr);
if (rv != nullptr) {
return rv;
}
auto value = new DexLocation(std::string(store_name), std::string(file_name));
key = std::make_pair(value->get_store_name(), value->get_file_name());
return try_insert(key, value, &s_location_map);
}
DexLocation* RedexContext::get_location(std::string_view store_name,
std::string_view file_name) {
auto key = std::make_pair(store_name, file_name);
return s_location_map.get(key, nullptr);
}
PositionPatternSwitchManager*
RedexContext::get_position_pattern_switch_manager() {
if (!m_position_pattern_switch_manager) {
m_position_pattern_switch_manager = new PositionPatternSwitchManager();
}
return m_position_pattern_switch_manager;
}
// Return false on unique classes
// Return true on benign duplicate classes
// Throw RedexException on problematic duplicate classes
bool RedexContext::class_already_loaded(DexClass* cls) {
std::lock_guard<std::mutex> l(m_type_system_mutex);
const DexType* type = cls->get_type();
const auto& it = m_type_to_class.find(type);
if (it == m_type_to_class.end()) {
return false;
} else {
const auto& prev_loc = it->second->get_location()->get_file_name();
const auto& cur_loc = cls->get_location()->get_file_name();
if (prev_loc == cur_loc || dup_classes::is_known_dup(cls)) {
// benign duplicates
TRACE(MAIN, 1, "Warning: found a duplicate class: %s", SHOW(cls));
} else {
const std::string& class_name = show(cls);
TRACE(MAIN,
1,
"Found a duplicate class: %s in two dexes:\ndex 1: %s\ndex "
"2: %s\n",
class_name.c_str(),
prev_loc.c_str(),
cur_loc.c_str());
if (!m_allow_class_duplicates) {
throw RedexException(
RedexError::DUPLICATE_CLASSES,
"Found duplicate class in two different files.",
{{"class", class_name}, {"dex1", prev_loc}, {"dex2", cur_loc}});
}
}
return true;
}
}
void RedexContext::publish_class(DexClass* cls) {
std::lock_guard<std::mutex> l(m_type_system_mutex);
const DexType* type = cls->get_type();
const auto& pair = m_type_to_class.emplace(type, cls);
bool insertion_took_place = pair.second;
always_assert_log(insertion_took_place,
"No insertion for class: %s with deobfuscated name: %s",
cls->get_name()->c_str(),
cls->get_deobfuscated_name().c_str());
if (cls->is_external()) {
m_external_classes.emplace_back(cls);
}
}
DexClass* RedexContext::type_class(const DexType* t) {
auto it = m_type_to_class.find(t);
return it != m_type_to_class.end() ? it->second : nullptr;
}
void RedexContext::set_field_value(DexField* field,
keep_rules::AssumeReturnValue& val) {
field_values.emplace(field,
std::make_unique<keep_rules::AssumeReturnValue>(val));
}
keep_rules::AssumeReturnValue* RedexContext::get_field_value(DexField* field) {
auto it = field_values.find(field);
if (it != field_values.end()) {
return it->second.get();
}
return nullptr;
}
void RedexContext::unset_field_value(DexField* field) {
field_values.erase(field);
}
void RedexContext::set_return_value(DexMethod* method,
keep_rules::AssumeReturnValue& val) {
method_return_values.emplace(
method, std::make_unique<keep_rules::AssumeReturnValue>(val));
}
keep_rules::AssumeReturnValue* RedexContext::get_return_value(
DexMethod* method) {
auto it = method_return_values.find(method);
if (it != method_return_values.end()) {
return it->second.get();
}
return nullptr;
}
void RedexContext::unset_return_value(DexMethod* method) {
method_return_values.erase(method);
}
void RedexContext::add_destruction_task(const Task& t) {
std::unique_lock<std::mutex> lock{m_destruction_tasks_lock};
m_destruction_tasks.push_back(t);
}
void RedexContext::set_sb_interaction_index(
const std::unordered_map<std::string, size_t>& input) {
m_sb_interaction_indices = input;
}