opt/interdex/InterDex.cpp (1,062 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "InterDex.h"
#include <boost/algorithm/string/predicate.hpp>
#include <algorithm>
#include <cinttypes>
#include <string>
#include <unordered_set>
#include <vector>
#include "CppUtil.h"
#include "Creators.h"
#include "Debug.h"
#include "DexAsm.h"
#include "DexClass.h"
#include "DexLoader.h"
#include "DexOutput.h"
#include "DexUtil.h"
#include "IRCode.h"
#include "IRInstruction.h"
#include "InterDexPassPlugin.h"
#include "MethodProfiles.h"
#include "ReachableClasses.h"
#include "Show.h"
#include "StlUtil.h"
#include "StringUtil.h"
#include "Walkers.h"
#include "file-utils.h"
namespace {
constexpr const char* SECONDARY_CANARY_PREFIX = "Lsecondary/dex";
constexpr const char SECONDARY_CANARY_CLASS_FORMAT[] =
"Lsecondary/dex%02d/Canary;";
constexpr size_t SECONDARY_CANARY_CLASS_BUFSIZE =
sizeof(SECONDARY_CANARY_CLASS_FORMAT);
constexpr const char STORE_CANARY_CLASS_FORMAT[] = "Lstore%04x/dex%02d/Canary;";
constexpr size_t STORE_CANARY_CLASS_BUFSIZE = sizeof(STORE_CANARY_CLASS_FORMAT);
constexpr const char* END_MARKER_FORMAT = "LDexEndMarker";
constexpr const char* SCROLL_SET_START_FORMAT = "LScrollSetStart";
constexpr const char* SCROLL_SET_END_FORMAT = "LScrollSetEnd";
constexpr const char* BG_SET_START_FORMAT = "LBackgroundSetStart";
constexpr const char* BG_SET_END_FORMAT = "LBackgroundSetEnd";
static interdex::DexInfo EMPTY_DEX_INFO;
std::string get_canary_name(int dexnum, const DexString* store_name) {
if (store_name) {
char buf[STORE_CANARY_CLASS_BUFSIZE];
int store_id = store_name->java_hashcode() & 0xFFFF;
// Yes, there could be collisions. We assume that is handled outside of
// Redex.
snprintf(buf, sizeof(buf), STORE_CANARY_CLASS_FORMAT, store_id, dexnum + 1);
return std::string(buf);
} else {
char buf[SECONDARY_CANARY_CLASS_BUFSIZE];
snprintf(buf, sizeof(buf), SECONDARY_CANARY_CLASS_FORMAT, dexnum);
return std::string(buf);
}
}
std::unordered_set<DexClass*> find_unrefenced_coldstart_classes(
const Scope& scope,
const std::vector<DexType*>& interdex_types,
bool static_prune_classes) {
int old_no_ref = -1;
int new_no_ref = 0;
std::unordered_set<DexType*> coldstart_classes(interdex_types.begin(),
interdex_types.end());
std::unordered_set<DexType*> cold_cold_references;
std::unordered_set<DexClass*> unreferenced_classes;
Scope input_scope = scope;
// Don't do analysis if we're not doing pruning.
if (!static_prune_classes) {
return unreferenced_classes;
}
while (old_no_ref != new_no_ref) {
old_no_ref = new_no_ref;
new_no_ref = 0;
cold_cold_references.clear();
walk::code(
input_scope,
[&](DexMethod* meth) {
return coldstart_classes.count(meth->get_class()) > 0;
},
[&](DexMethod* meth, const IRCode& code) {
auto base_cls = meth->get_class();
for (auto& mie : InstructionIterable(meth->get_code())) {
auto inst = mie.insn;
DexType* called_cls = nullptr;
if (inst->has_method()) {
called_cls = inst->get_method()->get_class();
} else if (inst->has_field()) {
called_cls = inst->get_field()->get_class();
} else if (inst->has_type()) {
called_cls = inst->get_type();
}
if (called_cls != nullptr && base_cls != called_cls &&
coldstart_classes.count(called_cls) > 0) {
cold_cold_references.insert(called_cls);
}
}
});
for (const auto& cls : scope) {
// Make sure we don't drop classes which might be
// called from native code.
if (!can_rename(cls)) {
cold_cold_references.insert(cls->get_type());
}
}
// Get all classes in the reference set, even if they are not referenced by
// opcodes directly.
for (const auto& cls : input_scope) {
if (cold_cold_references.count(cls->get_type())) {
std::vector<DexType*> types;
cls->gather_types(types);
for (const auto& type : types) {
cold_cold_references.insert(type);
}
}
}
Scope output_scope;
for (auto& cls : coldstart_classes) {
if (can_rename(type_class(cls)) && cold_cold_references.count(cls) == 0) {
new_no_ref++;
unreferenced_classes.insert(type_class(cls));
} else {
output_scope.push_back(type_class(cls));
}
}
TRACE(IDEX, 4, "Found %d classes in coldstart with no references.",
new_no_ref);
input_scope = output_scope;
}
return unreferenced_classes;
}
void gather_refs(
const std::vector<std::unique_ptr<interdex::InterDexPassPlugin>>& plugins,
const interdex::DexInfo& dex_info,
const DexClass* cls,
interdex::MethodRefs* mrefs,
interdex::FieldRefs* frefs,
interdex::TypeRefs* trefs,
interdex::TypeRefs* itrefs) {
std::vector<DexMethodRef*> method_refs;
std::vector<DexFieldRef*> field_refs;
std::vector<DexType*> type_refs;
std::vector<DexType*> init_type_refs;
cls->gather_methods(method_refs);
cls->gather_fields(field_refs);
cls->gather_types(type_refs);
cls->gather_init_classes(init_type_refs);
for (const auto& plugin : plugins) {
plugin->gather_refs(dex_info, cls, method_refs, field_refs, type_refs,
init_type_refs);
}
mrefs->insert(method_refs.begin(), method_refs.end());
frefs->insert(field_refs.begin(), field_refs.end());
trefs->insert(type_refs.begin(), type_refs.end());
itrefs->insert(init_type_refs.begin(), init_type_refs.end());
}
void print_stats(interdex::DexesStructure* dexes_structure) {
TRACE(IDEX, 2, "InterDex Stats:");
TRACE(IDEX, 2, "\t dex count: %zu", dexes_structure->get_num_dexes());
TRACE(IDEX, 2, "\t secondary dex count: %zu",
dexes_structure->get_num_secondary_dexes());
TRACE(IDEX, 2, "\t coldstart dex count: %zu",
dexes_structure->get_num_coldstart_dexes());
TRACE(IDEX, 2, "\t extendex dex count: %zu",
dexes_structure->get_num_extended_dexes());
TRACE(IDEX, 2, "\t scroll dex count: %zu",
dexes_structure->get_num_scroll_dexes());
TRACE(IDEX, 2, "Global stats:");
TRACE(IDEX, 2, "\t %lu classes", dexes_structure->get_num_classes());
TRACE(IDEX, 2, "\t %lu mrefs", dexes_structure->get_num_mrefs());
TRACE(IDEX, 2, "\t %lu frefs", dexes_structure->get_num_frefs());
TRACE(IDEX, 2, "\t %lu dmethods", dexes_structure->get_num_dmethods());
TRACE(IDEX, 2, "\t %lu vmethods", dexes_structure->get_num_vmethods());
TRACE(IDEX, 2, "\t %lu mrefs", dexes_structure->get_num_mrefs());
}
/**
* Order the classes in `scope` according to the`coldstart_class_names`.
*/
void do_order_classes(const std::vector<std::string>& coldstart_class_names,
Scope* scope) {
std::unordered_map<const DexClass*, uint32_t> class_to_priority;
uint32_t priority = 0;
for (const auto& class_name : coldstart_class_names) {
if (DexType* type = DexType::get_type(class_name.c_str())) {
if (auto cls = type_class(type)) {
class_to_priority[cls] = priority++;
cls->set_perf_sensitive(true);
}
}
}
TRACE(IDEX, 3, "IDEX: Ordered around %d classes at the beginning", priority);
std::stable_sort(
scope->begin(), scope->end(),
[&class_to_priority](const DexClass* left, const DexClass* right) {
uint32_t left_priority = std::numeric_limits<uint32_t>::max();
uint32_t right_priority = std::numeric_limits<uint32_t>::max();
auto it = class_to_priority.find(left);
if (it != class_to_priority.end()) {
left_priority = it->second;
}
it = class_to_priority.find(right);
if (it != class_to_priority.end()) {
right_priority = it->second;
}
return left_priority < right_priority;
});
}
} // namespace
namespace interdex {
bool is_canary(DexClass* clazz) {
const char* cname = clazz->get_type()->get_name()->c_str();
return strncmp(cname, SECONDARY_CANARY_PREFIX,
strlen(SECONDARY_CANARY_PREFIX)) == 0;
}
// Compare two classes for sorting in a way that is best for compression.
bool compare_dexclasses_for_compressed_size(DexClass* c1, DexClass* c2) {
// Canary classes go last
if (interdex::is_canary(c1) != interdex::is_canary(c2)) {
return (interdex::is_canary(c1) ? 1 : 0) <
(interdex::is_canary(c2) ? 1 : 0);
}
// Interfaces go after non-interfaces
if (is_interface(c1) != is_interface(c2)) {
return (is_interface(c1) ? 1 : 0) < (is_interface(c2) ? 1 : 0);
}
// Base types and implemented interfaces go last
if (type::check_cast(c2->get_type(), c1->get_type())) {
return false;
}
always_assert(c1 != c2);
if (type::check_cast(c1->get_type(), c2->get_type())) {
return true;
}
// If types are unrelated, sort by super-classes and then
// interfaces
if (c1->get_super_class() != c2->get_super_class()) {
return compare_dextypes(c1->get_super_class(), c2->get_super_class());
}
if (c1->get_interfaces() != c2->get_interfaces()) {
return compare_dextypelists(c1->get_interfaces(), c2->get_interfaces());
}
// Tie-breaker: fields/methods count distance
int dmethods_distance =
(int)c1->get_dmethods().size() - (int)c2->get_dmethods().size();
if (dmethods_distance != 0) {
return dmethods_distance < 0;
}
int vmethods_distance =
(int)c1->get_vmethods().size() - (int)c2->get_vmethods().size();
if (vmethods_distance != 0) {
return vmethods_distance < 0;
}
int ifields_distance =
(int)c1->get_ifields().size() - (int)c2->get_ifields().size();
if (ifields_distance != 0) {
return ifields_distance < 0;
}
int sfields_distance =
(int)c1->get_sfields().size() - (int)c2->get_sfields().size();
if (sfields_distance != 0) {
return sfields_distance < 0;
}
// Tie-breaker: has-class-data
if (c1->has_class_data() != c2->has_class_data()) {
return (c1->has_class_data() ? 1 : 0) < (c2->has_class_data() ? 1 : 0);
}
// Final tie-breaker: Compare types, which means names
return compare_dextypes(c1->get_type(), c2->get_type());
}
bool InterDex::should_skip_class_due_to_plugin(DexClass* clazz) {
for (const auto& plugin : m_plugins) {
if (plugin->should_skip_class(clazz)) {
TRACE(IDEX, 4, "IDEX: Skipping class from %s :: %s",
plugin->name().c_str(), SHOW(clazz));
return true;
}
}
return false;
}
void InterDex::add_to_scope(DexClass* cls) {
for (auto& plugin : m_plugins) {
plugin->add_to_scope(cls);
}
}
bool InterDex::should_not_relocate_methods_of_class(const DexClass* clazz) {
for (const auto& plugin : m_plugins) {
if (plugin->should_not_relocate_methods_of_class(clazz)) {
TRACE(IDEX, 4, "IDEX: Not relocating methods of class from %s :: %s",
plugin->name().c_str(), SHOW(clazz));
return true;
}
}
return false;
}
InterDex::EmitResult InterDex::emit_class(DexInfo& dex_info,
DexClass* clazz,
bool check_if_skip,
bool perf_sensitive,
DexClass** canary_cls,
bool* overflowed) {
if (is_canary(clazz)) {
// Nothing to do here.
return {false, false};
}
if (m_dexes_structure.has_class(clazz)) {
TRACE(IDEX, 6, "Trying to re-add class %s!", SHOW(clazz));
return {false, false};
}
if (check_if_skip && (should_skip_class_due_to_plugin(clazz))) {
return {false, false};
}
if (perf_sensitive) {
clazz->set_perf_sensitive(true);
}
// Calculate the extra method and field refs that we would need to add to
// the current dex if we defined clazz in it.
MethodRefs clazz_mrefs;
FieldRefs clazz_frefs;
TypeRefs clazz_trefs;
TypeRefs clazz_itrefs;
gather_refs(m_plugins, dex_info, clazz, &clazz_mrefs, &clazz_frefs,
&clazz_trefs, &clazz_itrefs);
bool fits_current_dex = m_dexes_structure.add_class_to_current_dex(
clazz_mrefs, clazz_frefs, clazz_trefs, clazz_itrefs, clazz);
if (!fits_current_dex) {
flush_out_dex(dex_info, *canary_cls);
*canary_cls = get_canary_cls(dex_info);
if (overflowed) {
*overflowed = true;
return {false, true};
}
// Plugins may maintain internal state after gathering refs, and
// then they tend to forget that state after flushing out (class
// merging, looking at you). So, let's redo gathering of refs here
// to give plugins a chance to rebuild their internal state.
clazz_mrefs.clear();
clazz_frefs.clear();
clazz_trefs.clear();
clazz_itrefs.clear();
gather_refs(m_plugins, dex_info, clazz, &clazz_mrefs, &clazz_frefs,
&clazz_trefs, &clazz_itrefs);
m_dexes_structure.add_class_no_checks(clazz_mrefs, clazz_frefs, clazz_trefs,
clazz_itrefs, clazz);
}
return {true, !fits_current_dex};
}
void InterDex::emit_primary_dex(
const DexClasses& primary_dex,
const std::vector<DexType*>& interdex_order,
const std::unordered_set<DexClass*>& unreferenced_classes) {
std::unordered_set<DexClass*> primary_dex_set(primary_dex.begin(),
primary_dex.end());
DexInfo primary_dex_info;
primary_dex_info.primary = true;
size_t coldstart_classes_in_primary = 0;
size_t coldstart_classes_skipped_in_primary = 0;
// Sort the primary dex according to interdex order (aka emit first the
// primary classes that appear in the interdex order, in the order that
// they appear there).
for (DexType* type : interdex_order) {
DexClass* cls = type_class(type);
if (!cls) {
continue;
}
if (primary_dex_set.count(cls) > 0) {
if (unreferenced_classes.count(cls) > 0) {
TRACE(IDEX, 5, "[primary dex]: %s no longer linked to coldstart set.",
SHOW(cls));
coldstart_classes_skipped_in_primary++;
continue;
}
emit_class(primary_dex_info, cls, /* check_if_skip */ true,
/* perf_sensitive */ true, /* canary_cls */ nullptr);
coldstart_classes_in_primary++;
}
}
// Now add the rest.
for (DexClass* cls : primary_dex) {
emit_class(primary_dex_info, cls, /* check_if_skip */ true,
/* perf_sensitive */ false, /* canary_cls */ nullptr);
}
TRACE(IDEX, 2,
"[primary dex]: %zu out of %zu classes in primary dex "
"from interdex list.",
coldstart_classes_in_primary, primary_dex.size());
TRACE(IDEX, 2,
"[primary dex]: %zu out of %zu classes in primary dex skipped "
"from interdex list.",
coldstart_classes_skipped_in_primary, primary_dex.size());
flush_out_dex(primary_dex_info, /* canary_cls */ nullptr);
// Double check only 1 dex was created.
always_assert_log(
m_dexes_structure.get_num_dexes() == 1,
"[error]: Primary dex doesn't fit in only 1 dex anymore :|, but in %zu\n",
m_dexes_structure.get_num_dexes());
}
void InterDex::emit_interdex_classes(
DexInfo& dex_info,
const std::vector<DexType*>& interdex_types,
const std::unordered_set<DexClass*>& unreferenced_classes,
DexClass** canary_cls) {
if (interdex_types.empty()) {
TRACE(IDEX, 2, "No interdex classes passed.");
return;
}
// NOTE: coldstart has no interaction with extended and scroll set, but that
// is not true for the later 2.
dex_info.coldstart = true;
size_t cls_skipped_in_secondary = 0;
bool reset_coldstart_on_overflow = false;
for (auto it = interdex_types.begin(); it != interdex_types.end(); ++it) {
DexType* type = *it;
DexClass* cls = type_class(type);
if (!cls) {
TRACE(IDEX, 5, "[interdex classes]: No such entry %s.", SHOW(type));
if (boost::algorithm::starts_with(type->get_name()->str(),
SCROLL_SET_START_FORMAT)) {
always_assert_log(
!m_emitting_scroll_set,
"Scroll start marker discovered after another scroll start marker");
always_assert_log(
!m_emitting_bg_set,
"Scroll start marker discovered between background set markers");
m_emitting_scroll_set = true;
TRACE(IDEX, 2, "Marking dex as scroll at betamap entry %zu",
std::distance(interdex_types.begin(), it));
dex_info.scroll = true;
} else if (boost::algorithm::starts_with(type->get_name()->str(),
SCROLL_SET_END_FORMAT)) {
always_assert_log(
m_emitting_scroll_set,
"Scroll end marker discovered without scroll start marker");
m_emitting_scroll_set = false;
} else if (boost::algorithm::starts_with(type->get_name()->str(),
BG_SET_START_FORMAT)) {
always_assert_log(!m_emitting_bg_set,
"Background start marker discovered after another "
"background start marker");
always_assert_log(
!m_emitting_scroll_set,
"Background start marker discovered between scroll set markers");
TRACE(IDEX, 2, "Marking dex as background at betamap entry %zu",
std::distance(interdex_types.begin(), it));
m_emitting_bg_set = true;
dex_info.background = true;
} else if (boost::algorithm::starts_with(type->get_name()->str(),
BG_SET_END_FORMAT)) {
always_assert_log(
m_emitting_bg_set,
"Background end marker discovered without background start marker");
m_emitting_bg_set = false;
m_emitted_bg_set = true;
} else {
auto end_marker =
std::find(m_end_markers.begin(), m_end_markers.end(), type);
// Cold start end marker is the last dex end marker
auto cold_start_end_marker = !m_end_markers.empty()
? m_end_markers.end() - 1
: m_end_markers.end();
if (end_marker != m_end_markers.end()) {
always_assert_log(
!m_emitting_scroll_set,
"End marker discovered between scroll start/end markers");
always_assert_log(
!m_emitting_bg_set,
"End marker discovered between background start/end markers");
TRACE(IDEX, 2, "Terminating dex due to %s", SHOW(type));
if (end_marker != cold_start_end_marker ||
!m_fill_last_coldstart_dex || m_end_markers.size() == 1) {
flush_out_dex(dex_info, *canary_cls);
*canary_cls = get_canary_cls(dex_info);
if (end_marker == cold_start_end_marker) {
dex_info.coldstart = false;
}
} else {
reset_coldstart_on_overflow = true;
TRACE(IDEX, 2, "Not flushing out marker %s to fill dex.",
SHOW(type));
}
}
}
} else {
if (unreferenced_classes.count(cls)) {
TRACE(IDEX, 3, "%s no longer linked to coldstart set.", SHOW(cls));
cls_skipped_in_secondary++;
continue;
}
if (m_emitted_bg_set) {
m_emitted_bg_set = false;
dex_info.extended = true;
m_emitting_extended = true;
}
dex_info.betamap_ordered = true;
auto res = emit_class(dex_info, cls, /* check_if_skip */ true,
/* perf_sensitive */ true, canary_cls);
if (res.overflowed && reset_coldstart_on_overflow) {
dex_info.coldstart = false;
reset_coldstart_on_overflow = false;
TRACE(IDEX, 2, "Flushing cold-start after non-flushed end marker.");
}
}
}
// Now emit the classes we omitted from the original coldstart set.
TRACE(IDEX, 2, "Emitting %zu interdex types (reset_coldstart_on_overflow=%d)",
interdex_types.size(), reset_coldstart_on_overflow);
for (DexType* type : interdex_types) {
DexClass* cls = type_class(type);
if (cls && unreferenced_classes.count(cls)) {
auto res = emit_class(dex_info, cls, /* check_if_skip */ true,
/* perf_sensitive */ false, canary_cls);
if (res.overflowed && reset_coldstart_on_overflow) {
dex_info.coldstart = false;
reset_coldstart_on_overflow = false;
TRACE(IDEX, 2, "Flushing cold-start after non-flushed end marker.");
}
}
}
TRACE(IDEX, 3,
"[interdex order]: %zu classes are unreferenced from the interdex "
"order in secondary dexes.",
cls_skipped_in_secondary);
// TODO: check for unterminated markers
always_assert_log(!m_emitting_scroll_set, "Unterminated scroll set marker");
always_assert_log(!m_emitting_bg_set, "Unterminated background set marker");
m_emitting_extended = false;
if (reset_coldstart_on_overflow) {
TRACE(IDEX, 2, "No overflow after cold-start dex, flushing now.");
flush_out_dex(dex_info, *canary_cls);
*canary_cls = get_canary_cls(dex_info);
dex_info.coldstart = false;
}
}
namespace {
/**
* Grab classes that should end up in a pre-defined interdex group.
*/
std::vector<std::vector<DexType*>> get_extra_classes_per_interdex_group(
const Scope& scope) {
std::vector<std::vector<DexType*>> res(MAX_DEX_NUM);
InterdexSubgroupIdx num_interdex_groups = 0;
walk::classes(scope, [&](const DexClass* cls) {
if (cls->rstate.has_interdex_subgroup()) {
InterdexSubgroupIdx interdex_subgroup =
cls->rstate.get_interdex_subgroup();
res[interdex_subgroup].push_back(cls->get_type());
num_interdex_groups =
std::max(num_interdex_groups, interdex_subgroup + 1);
}
});
res.resize(num_interdex_groups);
return res;
}
} // namespace
void InterDex::load_interdex_types() {
always_assert(m_interdex_types.empty());
const std::vector<std::string>& interdexorder =
m_conf.get_coldstart_classes();
// Find generated classes that should be in the interdex order.
std::vector<std::vector<DexType*>> interdex_group_classes =
get_extra_classes_per_interdex_group(m_scope);
size_t curr_interdex_group = 0;
std::unordered_set<DexClass*> classes(m_scope.begin(), m_scope.end());
std::unordered_set<DexType*> all_set{};
if (m_transitively_close_interdex_order && !m_force_single_dex) {
for (auto* cls : m_dexen[0]) {
all_set.insert(cls->get_type()); // Ignore primary.
}
}
std::unordered_set<DexType*> moved_or_double{};
std::unordered_set<DexType*> transitive_added{};
for (const auto& entry : interdexorder) {
DexType* type = DexType::get_type(entry.c_str());
if (!type) {
if (boost::algorithm::starts_with(entry, END_MARKER_FORMAT)) {
type = DexType::make_type(entry.c_str());
m_end_markers.emplace_back(type);
if (interdex_group_classes.size() > curr_interdex_group) {
for (DexType* extra_type :
interdex_group_classes.at(curr_interdex_group)) {
m_interdex_types.emplace_back(extra_type);
}
curr_interdex_group++;
}
TRACE(IDEX, 4, "[interdex order]: Found class end marker %s.",
entry.c_str());
} else if (boost::algorithm::starts_with(entry,
SCROLL_SET_START_FORMAT)) {
type = DexType::make_type(entry.c_str());
TRACE(IDEX, 4,
"[interdex order]: Found scroll set start class marker %s.",
entry.c_str());
} else if (boost::algorithm::starts_with(entry, SCROLL_SET_END_FORMAT)) {
type = DexType::make_type(entry.c_str());
TRACE(IDEX, 4,
"[interdex order]: Found scroll set end class marker %s.",
entry.c_str());
} else if (boost::algorithm::starts_with(entry, BG_SET_START_FORMAT)) {
type = DexType::make_type(entry.c_str());
TRACE(IDEX, 4, "[interdex order]: Found bg set start class marker %s.",
entry.c_str());
} else if (boost::algorithm::starts_with(entry, BG_SET_END_FORMAT)) {
type = DexType::make_type(entry.c_str());
TRACE(IDEX, 4, "[interdex order]: Found bg set end class marker %s.",
entry.c_str());
} else {
continue;
}
} else {
DexClass* cls = type_class(type);
if (!cls || classes.count(cls) == 0) {
continue;
}
if (cls->rstate.has_interdex_subgroup()) {
// Skipping generated classes that should end up in a specific group.
continue;
}
if (m_transitively_close_interdex_order) {
if (all_set.count(type) != 0) {
// Moved earlier.
moved_or_double.insert(type);
continue;
}
// Transitive closure.
self_recursive_fn(
[&](const auto& self, DexType* cur, bool add_self) {
DexClass* cur_cls = type_class(cur);
if (!cur_cls || classes.count(cur_cls) == 0 ||
all_set.count(cur) != 0 ||
cur_cls->rstate.has_interdex_subgroup()) {
return;
}
all_set.insert(cur);
if (add_self) {
transitive_added.insert(cur);
}
// Superclass first.
self(self, cur_cls->get_super_class(), true);
// Then interfaces.
for (auto* intf : *cur_cls->get_interfaces()) {
self(self, intf, true);
}
// Then self.
if (add_self) {
m_interdex_types.emplace_back(cur);
}
},
type, false);
}
}
m_interdex_types.emplace_back(type);
}
// We still want to add the ones in the last interdex group, if any.
always_assert_log(interdex_group_classes.size() <= curr_interdex_group + 2,
"Too many interdex subgroups!\n");
if (interdex_group_classes.size() > curr_interdex_group) {
for (DexType* type : interdex_group_classes.at(curr_interdex_group)) {
// TODO: Does the above need to filter? Do we need to transitively close
// here, too?
if (!m_transitively_close_interdex_order || all_set.count(type) == 0) {
m_interdex_types.push_back(type);
}
}
}
if (m_transitively_close_interdex_order) {
std::unordered_set<DexType*> transitive_moved;
for (auto* t : moved_or_double) {
if (transitive_added.count(t) != 0) {
transitive_moved.insert(t);
transitive_added.erase(t);
}
}
m_transitive_closure_added = transitive_added.size();
m_transitive_closure_moved = transitive_moved.size();
}
}
void InterDex::update_interdexorder(const DexClasses& dex,
std::vector<DexType*>* interdex_types) {
std::vector<DexType*> primary_dex;
for (DexClass* cls : dex) {
primary_dex.emplace_back(cls->get_type());
}
// We keep the primary classes untouched - at the beginning of
// the interdex list.
interdex_types->insert(interdex_types->begin(), primary_dex.begin(),
primary_dex.end());
}
void InterDex::init_cross_dex_ref_minimizer_and_relocate_methods() {
TRACE(IDEX, 2,
"[dex ordering] Cross-dex-ref-minimizer active with method ref weight "
"%" PRIu64 ", field ref weight %" PRIu64 ", type ref weight %" PRIu64
", string ref weight %" PRIu64 ", method seed weight %" PRIu64
", field seed weight %" PRIu64 ", type seed weight %" PRIu64
", string seed weight %" PRIu64 ".",
m_cross_dex_ref_minimizer.get_config().method_ref_weight,
m_cross_dex_ref_minimizer.get_config().field_ref_weight,
m_cross_dex_ref_minimizer.get_config().type_ref_weight,
m_cross_dex_ref_minimizer.get_config().string_ref_weight,
m_cross_dex_ref_minimizer.get_config().method_seed_weight,
m_cross_dex_ref_minimizer.get_config().field_seed_weight,
m_cross_dex_ref_minimizer.get_config().type_seed_weight,
m_cross_dex_ref_minimizer.get_config().string_seed_weight);
if (m_cross_dex_relocator_config.relocate_static_methods ||
m_cross_dex_relocator_config.relocate_non_static_direct_methods ||
m_cross_dex_relocator_config.relocate_virtual_methods) {
m_cross_dex_relocator =
new CrossDexRelocator(m_cross_dex_relocator_config, m_original_scope,
m_xstore_refs, m_dexes_structure);
TRACE(IDEX, 2,
"[dex ordering] Cross-dex-relocator active, max relocated methods "
"per class: %" PRIu64
", relocating static methods: %s"
", non-static direct methods: %s"
", virtual methods: %s",
m_cross_dex_relocator_config.max_relocated_methods_per_class,
m_cross_dex_relocator_config.relocate_static_methods ? "yes" : "no",
m_cross_dex_relocator_config.relocate_non_static_direct_methods
? "yes"
: "no",
m_cross_dex_relocator_config.relocate_virtual_methods ? "yes" : "no");
}
std::vector<DexClass*> classes_to_insert;
// Emit classes using some algorithm to group together classes which
// tend to share the same refs.
for (DexClass* cls : m_scope) {
// Don't bother with classes that emit_class will skip anyway.
// (Postpone checking should_skip_class until after we have possibly
// extracted relocatable methods.)
if (is_canary(cls) || m_dexes_structure.has_class(cls)) {
continue;
}
if (m_cross_dex_relocator != nullptr &&
!should_not_relocate_methods_of_class(cls)) {
std::vector<DexClass*> relocated_classes;
m_cross_dex_relocator->relocate_methods(cls, relocated_classes);
for (DexClass* relocated_cls : relocated_classes) {
// Tell all plugins that the new class is now effectively part of the
// scope.
add_to_scope(relocated_cls);
// It's important to call should_skip_class here, as some plugins
// build up state for each class via this call.
always_assert(!should_skip_class_due_to_plugin(relocated_cls));
m_cross_dex_ref_minimizer.ignore(relocated_cls);
classes_to_insert.emplace_back(relocated_cls);
}
}
// Don't bother with classes that emit_class will skip anyway
if (should_skip_class_due_to_plugin(cls)) {
// Skipping a class due to a plugin might mean that (members of) of the
// class will get emitted later via the additional-class mechanism.
// So we'll also sample those classes here.
m_cross_dex_ref_minimizer.sample(cls);
continue;
}
classes_to_insert.emplace_back(cls);
}
// Initialize ref frequency counts
for (DexClass* cls : classes_to_insert) {
m_cross_dex_ref_minimizer.sample(cls);
}
// Emit classes using some algorithm to group together classes which
// tend to share the same refs.
for (DexClass* cls : classes_to_insert) {
m_cross_dex_ref_minimizer.insert(cls);
}
// A few classes might have already been emitted to the current dex which we
// are about to fill up. Make it so that the minimizer knows that all the refs
// of those classes have already been emitted.
for (auto cls : m_dexes_structure.get_current_dex_classes()) {
m_cross_dex_ref_minimizer.sample(cls);
m_cross_dex_ref_minimizer.insert(cls);
m_cross_dex_ref_minimizer.erase(cls, /* emitted */ true,
/* overflowed */ false);
}
}
void InterDex::emit_remaining_classes(DexInfo& dex_info,
DexClass** canary_cls) {
m_current_classes_when_emitting_remaining =
m_dexes_structure.get_current_dex_classes().size();
if (!m_minimize_cross_dex_refs) {
for (DexClass* cls : m_scope) {
emit_class(dex_info, cls, /* check_if_skip */ true,
/* perf_sensitive */ false, canary_cls);
}
return;
}
init_cross_dex_ref_minimizer_and_relocate_methods();
// Strategy for picking the next class to emit:
// - at the beginning of a new dex, pick the "worst" class, i.e. the class
// with the most (adjusted) unapplied refs
// - otherwise, pick the "best" class according to the priority scheme that
// prefers classes that share many applied refs and bring in few unapplied
// refs
bool pick_worst = true;
while (!m_cross_dex_ref_minimizer.empty()) {
DexClass* cls{nullptr};
if (pick_worst) {
// Figure out which class has the most unapplied references
auto worst = m_cross_dex_ref_minimizer.worst();
// Use that worst class if it has more unapplied refs than already applied
// refs
if (m_cross_dex_ref_minimizer.get_unapplied_refs(worst) >
m_cross_dex_ref_minimizer.get_applied_refs()) {
cls = worst;
}
}
if (!cls) {
// Default case
cls = m_cross_dex_ref_minimizer.front();
}
bool overflowed = false;
bool emitted =
emit_class(dex_info, cls, /* check_if_skip */ false,
/* perf_sensitive */ false, canary_cls, &overflowed);
if (overflowed) {
always_assert(!emitted);
m_cross_dex_ref_minimizer.reset();
pick_worst = true;
if (m_cross_dex_relocator != nullptr) {
m_cross_dex_relocator->current_dex_overflowed();
}
continue;
}
m_cross_dex_ref_minimizer.erase(cls, emitted, overflowed);
if (m_cross_dex_relocator != nullptr) {
// Let's merge relocated helper classes
m_cross_dex_relocator->add_to_current_dex(cls);
}
pick_worst = pick_worst && !emitted;
}
}
void InterDex::cleanup(const Scope& final_scope) {
if (m_cross_dex_relocator != nullptr) {
m_cross_dex_relocator->cleanup(final_scope);
}
}
void InterDex::run_in_force_single_dex_mode() {
auto scope = build_class_scope(m_dexen);
const std::vector<std::string>& coldstart_class_names =
m_conf.get_coldstart_classes();
DexInfo dex_info;
dex_info.primary = true;
if (coldstart_class_names.empty()) {
TRACE(IDEX, 3, "IDEX single dex mode: No coldstart_classes");
} else {
dex_info.coldstart = true;
do_order_classes(coldstart_class_names, &scope);
}
// Add all classes into m_dexes_structure without further checking when
// force_single_dex is on. The overflow checking will be done later on at
// the end of the pipeline (e.g. write_classes_to_dex).
for (DexClass* cls : scope) {
MethodRefs clazz_mrefs;
FieldRefs clazz_frefs;
TypeRefs clazz_trefs;
TypeRefs clazz_itrefs;
gather_refs(m_plugins, dex_info, cls, &clazz_mrefs, &clazz_frefs,
&clazz_trefs, &clazz_itrefs);
m_dexes_structure.add_class_no_checks(clazz_mrefs, clazz_frefs, clazz_trefs,
clazz_itrefs, cls);
}
// Emit all no matter what it is.
if (!m_dexes_structure.get_current_dex_classes().empty()) {
flush_out_dex(dex_info, /* canary_cls */ nullptr);
}
TRACE(IDEX, 7, "IDEX: force_single_dex dex number: %zu",
m_dexes_structure.get_num_dexes());
print_stats(&m_dexes_structure);
}
void InterDex::run() {
TRACE(IDEX, 2, "IDEX: Running on root store");
if (m_force_single_dex) {
run_in_force_single_dex_mode();
return;
}
auto unreferenced_classes = find_unrefenced_coldstart_classes(
m_scope, m_interdex_types, m_static_prune_classes);
const auto& primary_dex = m_dexen[0];
// We have a bunch of special logic for the primary dex which we only use if
// we can't touch the primary dex.
if (!m_normal_primary_dex) {
emit_primary_dex(primary_dex, m_interdex_types, unreferenced_classes);
} else {
// NOTE: If primary dex is treated as a normal dex, we are going to modify
// it too, based on coldstart classes. If we can't remove the classes
// from the primary dex, we need to update the coldstart list to
// respect the primary dex.
if (m_keep_primary_order && !m_interdex_types.empty()) {
update_interdexorder(primary_dex, &m_interdex_types);
}
}
// Emit interdex classes, if any.
DexInfo dex_info;
DexClass* canary_cls = get_canary_cls(dex_info);
emit_interdex_classes(dex_info, m_interdex_types, unreferenced_classes,
&canary_cls);
// Now emit the classes that weren't specified in the head or primary list.
emit_remaining_classes(dex_info, &canary_cls);
// Add whatever leftovers there are from plugins.
for (const auto& plugin : m_plugins) {
auto add_classes = plugin->leftover_classes();
std::string name = plugin->name();
for (DexClass* add_class : add_classes) {
TRACE(IDEX, 4, "IDEX: Emitting %s-plugin generated leftover class :: %s",
name.c_str(), SHOW(add_class));
emit_class(dex_info, add_class, /* check_if_skip */ false,
/* perf_sensitive */ false, &canary_cls);
}
}
// Emit what is left, if any.
if (!m_dexes_structure.get_current_dex_classes().empty()) {
flush_out_dex(dex_info, canary_cls);
canary_cls = nullptr;
}
// Emit dex info manifest
if (m_asset_manager.has_secondary_dex_dir()) {
auto mixed_mode_file = m_asset_manager.new_asset_file("dex_manifest.txt");
auto mixed_mode_fh = FileHandle(*mixed_mode_file);
mixed_mode_fh.seek_end();
std::stringstream ss;
int ordinal = 0;
for (const auto& info : m_dex_infos) {
const auto& flags = std::get<1>(info);
ss << std::get<0>(info) << ",ordinal=" << ordinal++
<< ",coldstart=" << flags.coldstart << ",extended=" << flags.extended
<< ",primary=" << flags.primary << ",scroll=" << flags.scroll
<< ",background=" << flags.background << std::endl;
}
write_str(mixed_mode_fh, ss.str());
*mixed_mode_file = nullptr;
}
always_assert_log(!m_emit_canaries ||
m_dexes_structure.get_num_dexes() < MAX_DEX_NUM,
"Bailing, max dex number surpassed %zu\n",
m_dexes_structure.get_num_dexes());
print_stats(&m_dexes_structure);
}
void InterDex::run_on_nonroot_store() {
TRACE(IDEX, 2, "IDEX: Running on non-root store");
auto canary_cls = get_canary_cls(EMPTY_DEX_INFO);
for (DexClass* cls : m_scope) {
emit_class(EMPTY_DEX_INFO, cls, /* check_if_skip */ false,
/* perf_sensitive */ false, &canary_cls);
}
// Emit what is left, if any.
if (!m_dexes_structure.get_current_dex_classes().empty()) {
flush_out_dex(EMPTY_DEX_INFO, canary_cls);
}
print_stats(&m_dexes_structure);
}
void InterDex::add_dexes_from_store(const DexStore& store) {
auto canary_cls = get_canary_cls(EMPTY_DEX_INFO);
const auto& dexes = store.get_dexen();
for (const DexClasses& classes : dexes) {
for (DexClass* cls : classes) {
emit_class(EMPTY_DEX_INFO, cls, /* check_if_skip */ false,
/* perf_sensitive */ false, &canary_cls);
}
}
flush_out_dex(EMPTY_DEX_INFO, canary_cls);
}
void InterDex::set_clinit_methods_if_needed(DexClass* cls) {
using namespace dex_asm;
if (m_methods_for_canary_clinit_reference.empty()) {
// No methods to call from clinit; don't create clinit.
return;
}
// Create a clinit static method.
auto proto =
DexProto::make_proto(type::_void(), DexTypeList::make_type_list({}));
DexMethod* clinit =
DexMethod::make_method(cls->get_type(),
DexString::make_string("<clinit>"), proto)
->make_concrete(ACC_STATIC | ACC_CONSTRUCTOR, false);
clinit->set_code(std::make_unique<IRCode>());
cls->add_method(clinit);
clinit->set_deobfuscated_name(show_deobfuscated(clinit));
// Add code to clinit to call the other methods.
auto code = clinit->get_code();
size_t max_size = 0;
for (const auto& method_name : m_methods_for_canary_clinit_reference) {
// No need to do anything if this method isn't present in the build.
if (DexMethodRef* method = DexMethod::get_method(method_name)) {
std::vector<Operand> reg_operands;
int64_t reg = 0;
for (auto* dex_type : *method->get_proto()->get_args()) {
Operand reg_operand = {VREG, reg};
switch (dex_type->get_name()->c_str()[0]) {
case 'J':
case 'D':
// 8 bytes
code->push_back(dasm(OPCODE_CONST_WIDE, {reg_operand, 0_L}));
reg_operands.push_back(reg_operand);
reg += 2;
break;
default:
// 4 or fewer bytes
code->push_back(dasm(OPCODE_CONST, {reg_operand, 0_L}));
reg_operands.push_back(reg_operand);
++reg;
break;
}
}
max_size = std::max(max_size, (size_t)reg);
code->push_back(dasm(OPCODE_INVOKE_STATIC, method, reg_operands.begin(),
reg_operands.end()));
}
}
code->set_registers_size(max_size);
code->push_back(dasm(OPCODE_RETURN_VOID));
}
DexClass* create_canary(int dexnum, const DexString* store_name) {
std::string canary_name = get_canary_name(dexnum, store_name);
auto canary_type = DexType::get_type(canary_name);
if (!canary_type) {
TRACE(IDEX, 4, "Warning, no canary class %s found.", canary_name.c_str());
canary_type = DexType::make_type(canary_name.c_str());
}
auto canary_cls = type_class(canary_type);
if (!canary_cls) {
ClassCreator cc(canary_type);
cc.set_access(ACC_PUBLIC | ACC_ABSTRACT);
cc.set_super(type::java_lang_Object());
canary_cls = cc.create();
// Don't rename the Canary we've created
canary_cls->rstate.set_keepnames();
}
return canary_cls;
}
// Creates a canary class if necessary. (In particular, the primary dex never
// has a canary class.) This method should be called after flush_out_dex when
// beginning a new dex. As canary classes are added in the end without checks,
// the implied references are added here immediately to ensure that we don't
// exceed limits.
DexClass* InterDex::get_canary_cls(DexInfo& dex_info) {
if (!m_emit_canaries || dex_info.primary) {
return nullptr;
}
int dexnum = m_dexes_structure.get_num_dexes();
auto canary_cls = create_canary(dexnum);
set_clinit_methods_if_needed(canary_cls);
MethodRefs clazz_mrefs;
FieldRefs clazz_frefs;
TypeRefs clazz_trefs;
std::vector<DexType*> clazz_itrefs;
canary_cls->gather_methods(clazz_mrefs);
canary_cls->gather_fields(clazz_frefs);
canary_cls->gather_types(clazz_trefs);
canary_cls->gather_init_classes(clazz_itrefs);
m_dexes_structure.add_refs_no_checks(
clazz_mrefs, clazz_frefs, clazz_trefs,
TypeRefs(clazz_itrefs.begin(), clazz_itrefs.end()));
return canary_cls;
}
/**
* This needs to be called before getting to the next dex.
*/
void InterDex::flush_out_dex(DexInfo& dex_info, DexClass* canary_cls) {
int dexnum = m_dexes_structure.get_num_dexes();
if (dex_info.primary) {
TRACE(IDEX, 2, "Writing out primary dex with %zu classes.",
m_dexes_structure.get_current_dex_classes().size());
} else {
TRACE(IDEX, 2,
"Writing out secondary dex number %zu, which is %s of coldstart, "
"%s of extended set, %s of background set, %s scroll "
"classes and has %zu classes.",
m_dexes_structure.get_num_secondary_dexes() + 1,
(dex_info.coldstart ? "part of" : "not part of"),
(dex_info.extended ? "part of" : "not part of"),
(dex_info.background ? "part of" : "not part of"),
(dex_info.scroll ? "has" : "doesn't have"),
m_dexes_structure.get_current_dex_classes().size());
}
// Add the Canary class, if any.
if (canary_cls) {
always_assert(
m_dexes_structure.current_dex_has_tref(canary_cls->get_type()));
// Properly try to insert the class.
MethodRefs clazz_mrefs;
FieldRefs clazz_frefs;
TypeRefs clazz_trefs;
TypeRefs clazz_itrefs;
gather_refs(m_plugins, dex_info, canary_cls, &clazz_mrefs, &clazz_frefs,
&clazz_trefs, &clazz_itrefs);
bool canary_added = m_dexes_structure.add_class_to_current_dex(
clazz_mrefs, clazz_frefs, clazz_trefs, clazz_itrefs, canary_cls);
always_assert(canary_added);
m_dex_infos.emplace_back(
std::make_tuple(canary_cls->get_name()->str(), dex_info));
}
std::unordered_set<DexClass*> additional_classes;
for (auto& plugin : m_plugins) {
DexClasses classes = m_dexes_structure.get_current_dex_classes();
const DexClasses& squashed_classes =
m_dexes_structure.get_current_dex_squashed_classes();
classes.insert(classes.end(), squashed_classes.begin(),
squashed_classes.end());
for (auto cls : plugin->additional_classes(m_outdex, classes)) {
TRACE(IDEX, 4, "IDEX: Emitting %s-plugin-generated class :: %s",
plugin->name().c_str(), SHOW(cls));
m_dexes_structure.add_class_no_checks(cls);
// If this is the primary dex, or if there are any betamap-ordered classes
// in this dex, then we treat the additional classes as perf-sensitive, to
// be conservative.
if (dex_info.primary || dex_info.betamap_ordered) {
cls->set_perf_sensitive(true);
}
additional_classes.insert(cls);
}
}
{
auto classes = m_dexes_structure.end_dex(dex_info);
if (m_sort_remaining_classes) {
std::vector<DexClass*> perf_sensitive_classes;
using DexClassWithSortNum = std::pair<DexClass*, double>;
std::vector<DexClassWithSortNum> classes_with_sort_num;
std::vector<DexClass*> remaining_classes;
using namespace method_profiles;
// Copy intended!
auto mpoc = *m_conf.get_global_config()
.get_config_by_name<MethodProfileOrderingConfig>(
"method_profile_order");
mpoc.legacy_order = false;
mpoc.min_appear_percent = 1.0f;
dexmethods_profiled_comparator comparator(
{}, &m_conf.get_method_profiles(), &mpoc);
for (auto cls : classes) {
if (cls->is_perf_sensitive()) {
perf_sensitive_classes.push_back(cls);
continue;
}
double cls_sort_num = dexmethods_profiled_comparator::VERY_END;
walk::methods(std::vector<DexClass*>{cls}, [&](DexMethod* method) {
auto method_sort_num = comparator.get_overall_method_sort_num(method);
if (method_sort_num < cls_sort_num) {
cls_sort_num = method_sort_num;
}
});
if (cls_sort_num < dexmethods_profiled_comparator::VERY_END) {
classes_with_sort_num.emplace_back(cls, cls_sort_num);
continue;
}
remaining_classes.push_back(cls);
}
always_assert(perf_sensitive_classes.size() +
classes_with_sort_num.size() +
remaining_classes.size() ==
classes.size());
TRACE(IDEX, 2,
"Skipping %zu perf sensitive, ordering %zu by method profiles, and "
"sorting %zu classes",
perf_sensitive_classes.size(), classes_with_sort_num.size(),
remaining_classes.size());
std::stable_sort(
classes_with_sort_num.begin(), classes_with_sort_num.end(),
[](const DexClassWithSortNum& a, const DexClassWithSortNum& b) {
return a.second < b.second;
});
std::sort(remaining_classes.begin(), remaining_classes.end(),
compare_dexclasses_for_compressed_size);
// Rearrange classes so that...
// - perf_sensitive_classes go first, then
// - classes_with_sort_num that got ordered by the method profiles, and
// finally
// - remaining_classes
classes.clear();
classes.insert(classes.end(), perf_sensitive_classes.begin(),
perf_sensitive_classes.end());
for (auto& p : classes_with_sort_num) {
classes.push_back(p.first);
}
classes.insert(classes.end(), remaining_classes.begin(),
remaining_classes.end());
}
m_outdex.emplace_back(std::move(classes));
}
if (!m_emitting_scroll_set) {
dex_info.scroll = false;
}
if (!m_emitting_bg_set) {
dex_info.background = false;
}
if (!m_emitting_extended) {
dex_info.extended = false;
}
// This is false by default and set to true everytime
// a DEX contains classes already ordered by the betamap.
// This resets the flag as this method advances to the next
// writable DEX.
dex_info.betamap_ordered = false;
}
} // namespace interdex