libredex/DexLoader.cpp (507 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "DexLoader.h"
#include "AggregateException.h"
#include "DexAccess.h"
#include "DexCallSite.h"
#include "DexDefs.h"
#include "DexMethodHandle.h"
#include "IRCode.h"
#include "Macros.h"
#include "Show.h"
#include "Trace.h"
#include "Walkers.h"
#include "WorkQueue.h"
#include <exception>
#include <stdexcept>
#include <vector>
DexLoader::DexLoader(const DexLocation* location)
: m_idx(nullptr),
m_file(new boost::iostreams::mapped_file()),
m_location(location) {}
static void validate_dex_header(const dex_header* dh,
size_t dexsize,
int support_dex_version) {
bool supported = false;
switch (support_dex_version) {
case 38:
supported = supported ||
!memcmp(dh->magic, DEX_HEADER_DEXMAGIC_V38, sizeof(dh->magic));
FALLTHROUGH_INTENDED; /* intentional fallthrough to also check for v37 */
case 37:
supported = supported ||
!memcmp(dh->magic, DEX_HEADER_DEXMAGIC_V37, sizeof(dh->magic));
FALLTHROUGH_INTENDED; /* intentional fallthrough to also check for v35 */
case 35:
supported = supported ||
!memcmp(dh->magic, DEX_HEADER_DEXMAGIC_V35, sizeof(dh->magic));
break;
default:
not_reached_log("Unrecognized support_dex_version %d\n",
support_dex_version);
}
always_assert_log(supported, "Bad dex magic %s for support_dex_version %d\n",
dh->magic, support_dex_version);
always_assert_log(
dh->file_size == dexsize,
"Reported size in header (%zu) does not match file size (%u)\n",
dexsize,
dh->file_size);
auto off = (uint64_t)dh->class_defs_off;
auto limit = off + dh->class_defs_size * sizeof(dex_class_def);
always_assert_log(off < dexsize, "class_defs_off out of range");
always_assert_log(limit <= dexsize, "invalid class_defs_size");
}
void DexLoader::gather_input_stats(dex_stats_t* stats, const dex_header* dh) {
if (!stats) {
return;
}
stats->num_types += dh->type_ids_size;
stats->num_classes += dh->class_defs_size;
stats->num_method_refs += dh->method_ids_size;
stats->num_field_refs += dh->field_ids_size;
stats->num_strings += dh->string_ids_size;
stats->num_protos += dh->proto_ids_size;
stats->num_bytes += dh->file_size;
// T58562665: TODO - actually update states for callsites/methodhandles
stats->num_callsites += 0;
stats->num_methodhandles += 0;
std::unordered_set<DexEncodedValueArray, boost::hash<DexEncodedValueArray>>
enc_arrays;
std::set<DexTypeList*, dextypelists_comparator> type_lists;
std::unordered_set<uint32_t> anno_offsets;
for (uint32_t cidx = 0; cidx < dh->class_defs_size; ++cidx) {
auto* clz = m_classes->at(cidx);
if (clz == nullptr) {
// Skip nulls, they may have been introduced by benign duplicate classes
continue;
}
auto* class_def = &m_class_defs[cidx];
auto anno_off = class_def->annotations_off;
if (anno_off) {
const dex_annotations_directory_item* anno_dir =
(const dex_annotations_directory_item*)m_idx->get_uint_data(anno_off);
auto class_anno_off = anno_dir->class_annotations_off;
if (class_anno_off) {
const uint32_t* anno_data = m_idx->get_uint_data(class_anno_off);
uint32_t count = *anno_data++;
for (uint32_t aidx = 0; aidx < count; ++aidx) {
anno_offsets.insert(anno_data[aidx]);
}
}
const uint32_t* anno_data = (uint32_t*)(anno_dir + 1);
for (uint32_t fidx = 0; fidx < anno_dir->fields_size; ++fidx) {
anno_data++;
anno_offsets.insert(*anno_data++);
}
for (uint32_t midx = 0; midx < anno_dir->methods_size; ++midx) {
anno_data++;
anno_offsets.insert(*anno_data++);
}
for (uint32_t pidx = 0; pidx < anno_dir->parameters_size; ++pidx) {
anno_data++;
uint32_t xrefoff = *anno_data++;
if (xrefoff != 0) {
const uint32_t* annoxref = m_idx->get_uint_data(xrefoff);
uint32_t count = *annoxref++;
for (uint32_t j = 0; j < count; j++) {
uint32_t off = annoxref[j];
anno_offsets.insert(off);
}
}
}
}
auto* interfaces_type_list = clz->get_interfaces();
type_lists.insert(interfaces_type_list);
auto deva = clz->get_static_values();
if (deva) {
if (!enc_arrays.count(*deva)) {
enc_arrays.emplace(std::move(*deva));
stats->num_static_values++;
}
}
stats->num_fields += clz->get_ifields().size() + clz->get_sfields().size();
stats->num_methods +=
clz->get_vmethods().size() + clz->get_dmethods().size();
for (auto* meth : clz->get_vmethods()) {
DexCode* code = meth->get_dex_code();
if (code) {
stats->num_instructions += code->get_instructions().size();
}
}
for (auto* meth : clz->get_dmethods()) {
DexCode* code = meth->get_dex_code();
if (code) {
stats->num_instructions += code->get_instructions().size();
}
}
}
for (uint32_t meth_idx = 0; meth_idx < dh->method_ids_size; ++meth_idx) {
auto* meth = m_idx->get_methodidx(meth_idx);
DexProto* proto = meth->get_proto();
type_lists.insert(proto->get_args());
}
stats->num_annotations += anno_offsets.size();
stats->num_type_lists += type_lists.size();
for (uint32_t sidx = 0; sidx < dh->string_ids_size; ++sidx) {
auto str = m_idx->get_stringidx(sidx);
stats->strings_total_size += str->get_entry_size();
}
const dex_map_list* map_list =
reinterpret_cast<const dex_map_list*>(m_file->const_data() + dh->map_off);
bool header_seen = false;
uint32_t header_index = 0;
for (uint32_t i = 0; i < map_list->size; i++) {
const auto& item = map_list->items[i];
const uint8_t* encdata = m_idx->get_uleb_data(item.offset);
const uint8_t* initial_encdata = encdata;
switch (item.type) {
case TYPE_HEADER_ITEM:
always_assert_log(
!header_seen,
"Expected header_item to be unique in the map_list, "
"but encountered one at index i=%u and another at index j=%u.",
header_index,
i);
header_seen = true;
header_index = i;
always_assert_log(1 == item.size,
"Expected count of header_items in the map_list to be "
"exactly 1, but got ct=%u.",
item.size);
stats->header_item_count += item.size;
stats->header_item_bytes += item.size * sizeof(dex_header);
break;
case TYPE_STRING_ID_ITEM:
stats->string_id_count += item.size;
stats->string_id_bytes += item.size * sizeof(dex_string_id);
break;
case TYPE_TYPE_ID_ITEM:
stats->type_id_count += item.size;
stats->type_id_bytes += item.size * sizeof(dex_type_id);
break;
case TYPE_PROTO_ID_ITEM:
stats->proto_id_count += item.size;
stats->proto_id_bytes += item.size * sizeof(dex_proto_id);
break;
case TYPE_FIELD_ID_ITEM:
stats->field_id_count += item.size;
stats->field_id_bytes += item.size * sizeof(dex_field_id);
break;
case TYPE_METHOD_ID_ITEM:
stats->method_id_count += item.size;
stats->method_id_bytes += item.size * sizeof(dex_method_id);
break;
case TYPE_CLASS_DEF_ITEM:
stats->class_def_count += item.size;
stats->class_def_bytes += item.size * sizeof(dex_class_def);
break;
case TYPE_CALL_SITE_ID_ITEM:
stats->call_site_id_count += item.size;
stats->call_site_id_bytes += item.size * sizeof(dex_callsite_id);
break;
case TYPE_METHOD_HANDLE_ITEM:
stats->method_handle_count += item.size;
stats->method_handle_bytes += item.size * sizeof(dex_methodhandle_id);
break;
case TYPE_MAP_LIST:
stats->map_list_count += item.size;
for (uint32_t j = 0; j < item.size; j++) {
encdata = align_ptr(encdata, 4);
uint32_t map_list_entries = *(uint32_t*)(encdata);
stats->map_list_bytes +=
sizeof(uint32_t) + map_list_entries * sizeof(dex_map_item);
}
break;
case TYPE_TYPE_LIST:
stats->type_list_count += item.size;
for (uint32_t j = 0; j < item.size; j++) {
encdata = align_ptr(encdata, 4);
uint32_t type_list_entries = *(uint32_t*)(encdata);
stats->type_list_bytes +=
sizeof(uint32_t) + type_list_entries * sizeof(dex_type_item);
}
break;
case TYPE_ANNOTATION_SET_REF_LIST:
stats->annotation_set_ref_list_count += item.size;
for (uint32_t j = 0; j < item.size; j++) {
encdata = align_ptr(encdata, 4);
uint32_t annotation_set_ref_list_entries = *(uint32_t*)(encdata);
stats->annotation_set_ref_list_bytes +=
sizeof(uint32_t) + annotation_set_ref_list_entries *
sizeof(dex_annotation_set_ref_item);
}
break;
case TYPE_ANNOTATION_SET_ITEM:
stats->annotation_set_count += item.size;
for (uint32_t j = 0; j < item.size; j++) {
encdata = align_ptr(encdata, 4);
uint32_t annotation_set_entries = *(uint32_t*)(encdata);
stats->annotation_set_bytes +=
sizeof(uint32_t) +
annotation_set_entries * sizeof(dex_annotation_off_item);
}
break;
case TYPE_CLASS_DATA_ITEM:
stats->class_data_count += item.size;
for (uint32_t j = 0; j < item.size; j++) {
// Read in field sizes.
uint32_t static_fields_size = read_uleb128(&encdata);
uint32_t instance_fields_size = read_uleb128(&encdata);
uint32_t direct_methods_size = read_uleb128(&encdata);
uint32_t virtual_methods_size = read_uleb128(&encdata);
for (uint32_t k = 0; k < static_fields_size + instance_fields_size;
++k) {
// Read and skip all of the encoded_field data.
read_uleb128(&encdata);
read_uleb128(&encdata);
}
for (uint32_t k = 0; k < direct_methods_size + virtual_methods_size;
++k) {
// Read and skip all of the encoded_method data.
read_uleb128(&encdata);
read_uleb128(&encdata);
read_uleb128(&encdata);
}
}
stats->class_data_bytes += encdata - initial_encdata;
break;
case TYPE_CODE_ITEM:
stats->code_count += item.size;
for (uint32_t j = 0; j < item.size; j++) {
encdata = align_ptr(encdata, 4);
dex_code_item* code_item = (dex_code_item*)encdata;
encdata += sizeof(dex_code_item);
encdata += code_item->insns_size * sizeof(uint16_t);
if (code_item->tries_size != 0 && code_item->insns_size % 2 == 1) {
encdata += sizeof(uint16_t);
}
encdata += code_item->tries_size * sizeof(dex_tries_item);
if (code_item->tries_size != 0) {
uint32_t catch_handler_list_size = read_uleb128(&encdata);
for (uint32_t k = 0; k < catch_handler_list_size; ++k) {
int32_t catch_handler_size = read_sleb128(&encdata);
uint32_t abs_size = (uint32_t)std::abs(catch_handler_size);
for (uint32_t l = 0; l < abs_size; ++l) {
// Read encoded_type_addr_pair.
read_uleb128(&encdata);
read_uleb128(&encdata);
}
// Read catch_all_addr
if (catch_handler_size <= 0) {
read_uleb128(&encdata);
}
}
}
}
stats->code_bytes += encdata - initial_encdata;
break;
case TYPE_STRING_DATA_ITEM:
stats->string_data_count += item.size;
for (uint32_t j = 0; j < item.size; j++) {
// Skip data that encodes the number of UTF-16 code units.
read_uleb128(&encdata);
// Read up to and including the NULL-terminating byte.
while (true) {
const uint8_t byte = *encdata;
encdata++;
if (byte == 0) break;
}
}
stats->string_data_bytes += encdata - initial_encdata;
break;
case TYPE_DEBUG_INFO_ITEM:
stats->num_dbg_items += item.size;
for (uint32_t j = 0; j < item.size; j++) {
// line_start
read_uleb128(&encdata);
// param_count
uint32_t param_count = read_uleb128(&encdata);
while (param_count--) {
// Each parameter is one uleb128p1
read_uleb128p1(&encdata);
}
bool running = true;
while (running) {
uint8_t opcode = *encdata++;
switch (opcode) {
case DBG_END_SEQUENCE:
running = false;
break;
case DBG_ADVANCE_PC:
case DBG_END_LOCAL:
case DBG_RESTART_LOCAL:
// each of these opcodes has one uleb128 arg:
// - addr_diff
// - register_num
// - register_num
read_uleb128(&encdata);
break;
case DBG_ADVANCE_LINE:
// line_diff
read_sleb128(&encdata);
break;
case DBG_START_LOCAL:
// register_num
read_uleb128(&encdata);
// name_idx
read_uleb128p1(&encdata);
// type_idx
read_uleb128p1(&encdata);
break;
case DBG_START_LOCAL_EXTENDED:
// register_num
read_uleb128(&encdata);
// name_idx
read_uleb128p1(&encdata);
// type_idx
read_uleb128p1(&encdata);
// sig_idx
read_uleb128p1(&encdata);
break;
case DBG_SET_FILE:
// name_idx
read_uleb128p1(&encdata);
break;
case DBG_SET_PROLOGUE_END:
case DBG_SET_EPILOGUE_BEGIN:
// These cases have no args
break;
default:
// These are special opcodes. We separate them out to the default
// case to show we're properly interpretting this program.
break;
}
}
}
stats->dbg_total_size += encdata - initial_encdata;
break;
case TYPE_ANNOTATION_ITEM:
// TBD!
break;
case TYPE_ENCODED_ARRAY_ITEM:
// TBD!
break;
case TYPE_ANNOTATIONS_DIR_ITEM:
stats->annotations_directory_count += item.size;
for (uint32_t j = 0; j < item.size; ++j) {
encdata = align_ptr(encdata, 4);
dex_annotations_directory_item* annotations_directory_item =
(dex_annotations_directory_item*)encdata;
encdata += sizeof(dex_annotations_directory_item);
encdata += sizeof(dex_field_annotation) *
annotations_directory_item->fields_size;
encdata += sizeof(dex_method_annotation) *
annotations_directory_item->methods_size;
encdata += sizeof(dex_parameter_annotation) *
annotations_directory_item->parameters_size;
}
stats->annotations_directory_bytes += encdata - initial_encdata;
break;
case TYPE_HIDDENAPI_CLASS_DATA_ITEM:
// No stats gathered.
break;
default:
fprintf(
stderr,
"warning: map_list item at index i=%u is of unknown type T=0x%04hX\n",
i,
item.type);
}
}
}
void DexLoader::load_dex_class(int num) {
const dex_class_def* cdef = m_class_defs + num;
DexClass* dc = DexClass::create(m_idx.get(), cdef, m_location);
// We may be inserting a nullptr here. Need to remove them later
//
// We're inserting nullptr because we can't mess up the indices of the other
// classes in the vector. This vector is used via random access.
m_classes->at(num) = dc;
}
const dex_header* DexLoader::get_dex_header(const char* file_name) {
m_file->open(file_name, boost::iostreams::mapped_file::readonly);
if (!m_file->is_open()) {
fprintf(stderr, "error: cannot create memory-mapped file: %s\n", file_name);
exit(EXIT_FAILURE);
}
return reinterpret_cast<const dex_header*>(m_file->const_data());
}
DexClasses DexLoader::load_dex(const char* file_name,
dex_stats_t* stats,
int support_dex_version) {
const dex_header* dh = get_dex_header(file_name);
validate_dex_header(dh, m_file->size(), support_dex_version);
return load_dex(dh, stats);
}
DexClasses DexLoader::load_dex(const dex_header* dh, dex_stats_t* stats) {
if (dh->class_defs_size == 0) {
return DexClasses(0);
}
m_idx = std::make_unique<DexIdx>(dh);
auto off = (uint64_t)dh->class_defs_off;
m_class_defs =
reinterpret_cast<const dex_class_def*>((const uint8_t*)dh + off);
DexClasses classes(dh->class_defs_size);
m_classes = &classes;
{
auto num_threads = redex_parallel::default_num_threads();
std::vector<std::vector<std::exception_ptr>> exceptions_vec(num_threads);
std::vector<size_t> indices(dh->class_defs_size);
std::iota(indices.begin(), indices.end(), 0);
workqueue_run<size_t>(
[&exceptions_vec, this](sparta::SpartaWorkerState<size_t>* state,
size_t num) {
try {
load_dex_class(num);
} catch (const std::exception& exc) {
TRACE(MAIN, 1, "Worker throw the exception:%s", exc.what());
exceptions_vec[state->worker_id()].emplace_back(
std::current_exception());
}
},
indices,
num_threads);
std::vector<std::exception_ptr> all_exceptions;
for (auto& exceptions : exceptions_vec) {
all_exceptions.insert(all_exceptions.end(), exceptions.begin(),
exceptions.end());
}
if (!all_exceptions.empty()) {
// At least one of the workers raised an exception
aggregate_exception ae(all_exceptions);
throw ae;
}
}
gather_input_stats(stats, dh);
// Remove nulls from the classes list. They may have been introduced by benign
// duplicate classes.
classes.erase(std::remove(classes.begin(), classes.end(), nullptr),
classes.end());
return classes;
}
static void balloon_all(const Scope& scope, bool throw_on_error) {
ConcurrentMap<DexMethod*, std::string> ir_balloon_errors;
walk::parallel::methods(scope, [&](DexMethod* m) {
if (m->get_dex_code()) {
try {
m->balloon();
} catch (RedexException& re) {
ir_balloon_errors.emplace(m, re.what());
}
}
});
if (!ir_balloon_errors.empty()) {
std::ostringstream oss;
oss << "Error lifting DexCode to IRCode for the following methods:"
<< std::endl;
for (const auto& [method, msg] : ir_balloon_errors) {
oss << show(method) << ": " << msg << std::endl;
}
always_assert_log(!throw_on_error,
"%s" /* format string must be a string literal */,
oss.str().c_str());
TRACE(MAIN, 1, "%s" /* format string must be a string literal */,
oss.str().c_str());
}
}
DexClasses load_classes_from_dex(const DexLocation* location,
bool balloon,
bool throw_on_balloon_error,
int support_dex_version) {
dex_stats_t stats;
return load_classes_from_dex(location, &stats, balloon,
throw_on_balloon_error, support_dex_version);
}
DexClasses load_classes_from_dex(const DexLocation* location,
dex_stats_t* stats,
bool balloon,
bool throw_on_balloon_error,
int support_dex_version) {
TRACE(MAIN, 1, "Loading classes from dex from %s",
location->get_file_name().c_str());
DexLoader dl(location);
auto classes = dl.load_dex(location->get_file_name().c_str(), stats,
support_dex_version);
if (balloon) {
balloon_all(classes, throw_on_balloon_error);
}
return classes;
}
DexClasses load_classes_from_dex(const dex_header* dh,
const DexLocation* location,
bool balloon,
bool throw_on_balloon_error) {
DexLoader dl(location);
auto classes = dl.load_dex(dh, nullptr);
if (balloon) {
balloon_all(classes, throw_on_balloon_error);
}
return classes;
}
std::string load_dex_magic_from_dex(const DexLocation* location) {
DexLoader dl(location);
auto dh = dl.get_dex_header(location->get_file_name().c_str());
return dh->magic;
}
void balloon_for_test(const Scope& scope) { balloon_all(scope, true); }