tools/oatmeal/dex.cpp (642 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #include "dex.h" #include "DexDefs.h" #include "DexEncoding.h" #include "DexOpcodeDefs.h" #include "OatmealUtil.h" #include "QuickData.h" #include "mmap.h" #include <sys/mman.h> #include <sys/stat.h> #include <sys/types.h> #include <memory> #include <unordered_map> #define WRITE16_TO_BUFFER(buffer, to_write, file_ptr) \ buffer << to_write; \ file_ptr += 2; namespace { using InsnWalkerFn = const std::function<void(DexOpcode, const uint16_t* const ptr)>&; using CodeItemWalkerFn = const std::function<void(const uint8_t* const code_item)>&; void make_instruction(const uint16_t** insns_ptr, const QuickData* quick_data, const std::string* dex, size_t& file_ptr, WritableBuffer& out_buffer) { auto& insns = *insns_ptr; auto fopcode = static_cast<DexOpcode>(*insns++); DexOpcode opcode = static_cast<DexOpcode>(fopcode & 0xff); // clang-format off #ifdef DEBUG_LOG printf("Processing FOPCODE::OPCODE: %04x :: %02x :: %s\n", fopcode, opcode, print(opcode).c_str()); #endif switch (opcode) { case DOPCODE_NOP: { #ifdef DEBUG_LOG printf("Processing FOPCODE: %s\n", print(fopcode).c_str()); #endif if (fopcode == FOPCODE_PACKED_SWITCH) { size_t count = (*insns--) * 2 + 4; for (size_t i = 0; i < count; i++) { WRITE16_TO_BUFFER(out_buffer, insns, file_ptr) insns++; } return; } else if (fopcode == FOPCODE_SPARSE_SWITCH) { size_t count = (*insns--) * 4 + 2; for (size_t i = 0; i < count; i++) { WRITE16_TO_BUFFER(out_buffer, insns, file_ptr) insns++; } return; } else if (fopcode == FOPCODE_FILLED_ARRAY) { uint16_t ewidth = *insns++; uint32_t size = *((uint32_t*)insns); size_t count = (ewidth * size + 1) / 2 + 4; insns -= 2; for (size_t i = 0; i < count; i++) { WRITE16_TO_BUFFER(out_buffer, insns, file_ptr) insns++; } return; } } /* fall through for NOP */ SWITCH_FORMAT_10 { WRITE16_TO_BUFFER(out_buffer, fopcode, file_ptr) #ifdef DEBUG_LOG printf("Writing OPCODE: %02x :: %s\n", opcode, print(opcode).c_str()); #endif break; } SWITCH_FORMAT_20 { WRITE16_TO_BUFFER(out_buffer, fopcode, file_ptr) uint16_t arg = *insns++; WRITE16_TO_BUFFER(out_buffer, arg, file_ptr) #ifdef DEBUG_LOG printf("Writing OPCODE: %02x %02x :: %s\n", opcode, arg, print(opcode).c_str()); #endif break; } SWITCH_FORMAT_30 { WRITE16_TO_BUFFER(out_buffer, fopcode, file_ptr) uint16_t arg_low = *insns++; WRITE16_TO_BUFFER(out_buffer, arg_low, file_ptr) uint16_t arg_high = *insns++; WRITE16_TO_BUFFER(out_buffer, arg_high, file_ptr) #ifdef DEBUG_LOG printf("Writing OPCODE: %02x %02x%02x :: %s\n", opcode, arg_low, arg_high, print(opcode).c_str()); #endif break; } SWITCH_FORMAT_50 { WRITE16_TO_BUFFER(out_buffer, fopcode, file_ptr) uint16_t arg_0 = *insns++; WRITE16_TO_BUFFER(out_buffer, arg_0, file_ptr) uint16_t arg_1 = *insns++; WRITE16_TO_BUFFER(out_buffer, arg_1, file_ptr) uint16_t arg_2 = *insns++; WRITE16_TO_BUFFER(out_buffer, arg_2, file_ptr) uint16_t arg_3 = *insns++; WRITE16_TO_BUFFER(out_buffer, arg_3, file_ptr) #ifdef DEBUG_LOG printf("Writing OPCODE: %02x %02x%02x%02x%02x :: %s\n", opcode, arg_0, arg_1, arg_2, arg_3, print(opcode).c_str()); #endif break; // return new DexInstruction(insns - 5, 4); } SWITCH_FORMAT_REGULAR_FIELD_REF { uint16_t fidx = *insns++; uint16_t quick_fopcode = fopcode; uint16_t quick_arg = fidx; uint16_t quick_data_off = quick_data->get_field_offset(*dex, fidx); if (quick_data_off > 0) { quick_fopcode = (fopcode & 0xff00) | (quicken(opcode) & 0x00ff); quick_arg = quick_data_off; #ifdef DEBUG_LOG printf("QUICKEN: [%s] %s :: %02x->%02x :: %02x->%02x\n", (*dex).c_str(), print(opcode).c_str(), fopcode, quick_fopcode, fidx, quick_arg); #endif } else { #ifdef DEBUG_LOG printf("No quick mapping for: [%s]:%u\n", (*dex).c_str(), fidx); #endif } WRITE16_TO_BUFFER(out_buffer, quick_fopcode, file_ptr) WRITE16_TO_BUFFER(out_buffer, quick_arg, file_ptr) #ifdef DEBUG_LOG printf("Writing OPCODE: %02x %02x :: %s\n", quick_fopcode, quick_arg, print(opcode).c_str()); #endif break; } SWITCH_FORMAT_REGULAR_METHOD_REF { WRITE16_TO_BUFFER(out_buffer, fopcode, file_ptr) uint16_t midx = *insns++; uint16_t arg = *insns++; WRITE16_TO_BUFFER(out_buffer, midx, file_ptr) WRITE16_TO_BUFFER(out_buffer, arg, file_ptr) #ifdef DEBUG_LOG printf("Writing OPCODE: %02x %02x %02x :: %s\n", fopcode, midx, arg, print(opcode).c_str()); #endif break; } SWITCH_FORMAT_CONST_STRING { WRITE16_TO_BUFFER(out_buffer, fopcode, file_ptr) uint16_t sidx = *insns++; WRITE16_TO_BUFFER(out_buffer, sidx, file_ptr) #ifdef DEBUG_LOG printf("Writing OPCODE: %02x %02x :: %s\n", fopcode, sidx, print(opcode).c_str()); #endif break; } SWITCH_FORMAT_CONST_STRING_JUMBO { WRITE16_TO_BUFFER(out_buffer, fopcode, file_ptr) uint16_t sidx_partial_low = *insns++; uint16_t sidx_partial_high = *insns++; WRITE16_TO_BUFFER(out_buffer, sidx_partial_low, file_ptr) WRITE16_TO_BUFFER(out_buffer, sidx_partial_high, file_ptr) #ifdef DEBUG_LOG uint32_t sidx = sidx_partial_high << 16 | sidx_partial_low; printf("Writing OPCODE: %02x %04x :: %s\n", fopcode, sidx, print(opcode).c_str()); #endif break; } SWITCH_FORMAT_TYPE_REF { WRITE16_TO_BUFFER(out_buffer, fopcode, file_ptr) uint16_t tidx = *insns++; WRITE16_TO_BUFFER(out_buffer, tidx, file_ptr) #ifdef DEBUG_LOG printf("Writing OPCODE: %02x %02x :: %s\n", fopcode, tidx, print(opcode).c_str()); #endif break; } SWITCH_FORMAT_FILL_ARRAY { WRITE16_TO_BUFFER(out_buffer, fopcode, file_ptr) uint16_t tidx = *insns++; uint16_t arg = *insns++; WRITE16_TO_BUFFER(out_buffer, tidx, file_ptr) WRITE16_TO_BUFFER(out_buffer, arg, file_ptr) #ifdef DEBUG_LOG printf("Writing OPCODE: %02x %02x :: %s\n", fopcode, tidx, print(opcode).c_str()); #endif break; } default: fprintf(stderr, "Unknown opcode %02x\n", opcode); // return nullptr; } // clang-format on } /* * See class_data_item in Dex spec. */ void load_class_data_item( const uint8_t* class_data_item, std::unordered_map<uint32_t, uint32_t>& code_item_offset) { const uint8_t* encd = class_data_item; uint32_t sfield_count = read_uleb128(&encd); uint32_t ifield_count = read_uleb128(&encd); uint32_t dmethod_count = read_uleb128(&encd); uint32_t vmethod_count = read_uleb128(&encd); uint32_t ndex = 0; for (uint32_t i = 0; i < sfield_count; i++) { ndex += read_uleb128(&encd); // field_idx_diff read_uleb128(&encd); // access_flags } ndex = 0; for (uint32_t i = 0; i < ifield_count; i++) { ndex += read_uleb128(&encd); // field_idx_diff read_uleb128(&encd); // access_flags } ndex = 0; for (uint32_t i = 0; i < dmethod_count; i++) { ndex += read_uleb128(&encd); read_uleb128(&encd); // access_flags uint32_t code_off = read_uleb128(&encd); code_item_offset[code_off] = 0; } ndex = 0; for (uint32_t i = 0; i < vmethod_count; i++) { ndex += read_uleb128(&encd); read_uleb128(&encd); // access_flags uint32_t code_off = read_uleb128(&encd); code_item_offset[code_off] = 0; } (void)ndex; } /* * See code_item in Dex spec. */ void load_code_item(uint8_t* const code_item, const QuickData* quick_data, const std::string* dex, size_t& file_ptr, WritableBuffer& out_buffer) { const dex_code_item* code = reinterpret_cast<const dex_code_item*>(code_item); uint8_t* const dex_code_item_end = reinterpret_cast<uint8_t* const>(const_cast<dex_code_item*>(code + 1)); #ifdef DEBUG_LOG printf("method: %p, %u, %u, %u\n", (void*)code_item, code->registers_size, code->ins_size, code->outs_size); #endif uint8_t* out_ptr = code_item; while (out_ptr < dex_code_item_end) { out_buffer << reinterpret_cast<char*>(out_ptr++); file_ptr++; } file_ptr--; uint32_t tries = code->tries_size; const uint16_t* cdata = reinterpret_cast<const uint16_t*>(dex_code_item_end); if (code->insns_size) { const uint16_t* const end = cdata + code->insns_size; while (cdata < end) { make_instruction(&cdata, quick_data, dex, file_ptr, out_buffer); } /* * Padding, see dex-spec. * Per my memory, there are dex-files where the padding is * implemented not according to spec. Just FYI in case * something weird happens in the future. */ if (code->insns_size & 1 && tries) cdata++; } } void process_instruction(const uint16_t** insns_ptr, InsnWalkerFn walker) { auto& insns = *insns_ptr; auto fopcode = static_cast<DexOpcode>(*insns); DexOpcode opcode = static_cast<DexOpcode>(fopcode & 0xff); // clang-format off #ifdef DEBUG_LOG printf("Processing FOPCODE::OPCODE: %04x :: %02x :: %s\n", fopcode, opcode, print(opcode).c_str()); #endif switch (opcode) { case DOPCODE_NOP: { #ifdef DEBUG_LOG printf("Processing FOPCODE: %s\n", print(fopcode).c_str()); #endif if (fopcode == FOPCODE_PACKED_SWITCH) { size_t count = (*(insns + 1)) * 2 + 4; for (size_t i = 0; i < count; i++) { insns++; } return; } else if (fopcode == FOPCODE_SPARSE_SWITCH) { size_t count = (*(insns + 1)) * 4 + 2; for (size_t i = 0; i < count; i++) { insns++; } return; } else if (fopcode == FOPCODE_FILLED_ARRAY) { uint16_t ewidth = *(insns + 1); uint32_t size = *(reinterpret_cast<const uint32_t*>(insns + 2)); size_t count = (ewidth * size + 1) / 2 + 4; for (size_t i = 0; i < count; i++) { insns++; } return; } } SWITCH_FORMAT_10 SWITCH_FORMAT_RETURN_VOID_NO_BARRIER { walker(opcode, insns++); #ifdef DEBUG_LOG printf("Walking OPCODE: %02x :: %s\n", opcode, print(opcode).c_str()); #endif break; } SWITCH_FORMAT_20 { walker(opcode, insns++); #ifdef DEBUG_LOG uint16_t arg = *insns; printf("Walking OPCODE: %02x %02x :: %s\n", opcode, arg, print(opcode).c_str()); #endif insns++; break; } SWITCH_FORMAT_30 { walker(opcode, insns++); #ifdef DEBUG_LOG uint16_t arg_low = *insns; uint16_t arg_high = *(insns+1); printf("Walking OPCODE: %02x %02x%02x :: %s\n", opcode, arg_low, arg_high, print(opcode).c_str()); #endif insns+=2; break; } SWITCH_FORMAT_50 { walker(opcode, insns++); #ifdef DEBUG_LOG uint16_t arg_0 = *insns; uint16_t arg_1 = *(insns+1); uint16_t arg_2 = *(insns+2); uint16_t arg_3 = *(insns+3); printf("Walking OPCODE: %02x %02x%02x%02x%02x :: %s\n", opcode, arg_0, arg_1, arg_2, arg_3, print(opcode).c_str()); #endif insns+=4; break; } SWITCH_FORMAT_REGULAR_FIELD_REF SWITCH_FORMAT_QUICK_FIELD_REF { walker(opcode, insns++); #ifdef DEBUG_LOG uint16_t fidx = *insns; printf("Walking OPCODE: %02x %02x :: %s\n", fopcode, fidx, print(opcode).c_str()); #endif insns++; break; } SWITCH_FORMAT_REGULAR_METHOD_REF SWITCH_FORMAT_QUICK_METHOD_REF { walker(opcode, insns++); #ifdef DEBUG_LOG uint16_t midx = *insns; uint16_t arg = *(insns+1); printf("Walking OPCODE: %02x %02x %02x :: %s\n", fopcode, midx, arg, print(opcode).c_str()); #endif insns+=2; break; } SWITCH_FORMAT_CONST_STRING { walker(opcode, insns++); #ifdef DEBUG_LOG uint16_t sidx = *insns; printf("Walking OPCODE: %02x %02x :: %s\n", fopcode, sidx, print(opcode).c_str()); #endif insns++; break; } SWITCH_FORMAT_CONST_STRING_JUMBO { walker(opcode, insns++); #ifdef DEBUG_LOG uint16_t sidx_partial_low = *insns; uint16_t sidx_partial_high = *(insns+1); uint32_t sidx = sidx_partial_high << 16 | sidx_partial_low; printf("Walking OPCODE: %02x %04x :: %s\n", fopcode, sidx, print(opcode).c_str()); #endif insns+=2; break; } SWITCH_FORMAT_TYPE_REF { walker(opcode, insns++); #ifdef DEBUG_LOG uint16_t tidx = *insns; printf("Walking OPCODE: %02x %02x :: %s\n", fopcode, tidx, print(opcode).c_str()); #endif insns++; break; } SWITCH_FORMAT_FILL_ARRAY { walker(opcode, insns++); #ifdef DEBUG_LOG uint16_t tidx = *insns; uint16_t arg = *(insns+1); printf("Walking OPCODE: %02x %02x %02x :: %s\n", fopcode, tidx, arg, print(opcode).c_str()); #endif insns+=2; break; } default: fprintf(stderr, "Unknown opcode %02x\n", opcode); // return nullptr; } // clang-format on } /* * See code_item in Dex spec. */ void process_code_item(const uint8_t* code_item, InsnWalkerFn walker) { const dex_code_item* code = reinterpret_cast<const dex_code_item*>(code_item); uint8_t* const dex_code_item_end = reinterpret_cast<uint8_t* const>(const_cast<dex_code_item*>(code + 1)); #ifdef DEBUG_LOG printf("method: %p, %u, %u, %u\n", (void*)code_item, code->registers_size, code->ins_size, code->outs_size); #endif const uint16_t* cdata = reinterpret_cast<const uint16_t*>(dex_code_item_end); if (code->insns_size) { const uint16_t* const end = cdata + code->insns_size; while (cdata < end) { process_instruction(&cdata, walker); } } } } // Anonymous namespace void quicken_dex(const char* location, const QuickData* quick_data, FileHandle& out) { FILE* fd = fopen(location, "r"); std::string error_msg; CHECK(location != nullptr); std::unique_ptr<MappedFile> map; { START_TRACE() struct stat sbuf; memset(&sbuf, 0, sizeof(sbuf)); if (fstat(fileno(fd), &sbuf) == -1) { fprintf(stderr, "DexFile: fstat '%s' failed\n", location); exit(1); } if (S_ISDIR(sbuf.st_mode)) { fprintf(stderr, "Attempt to mmap directory '%s'\n", location); exit(1); } size_t length = sbuf.st_size; map.reset(MappedFile::mmap_file(length, PROT_READ, MAP_PRIVATE, fileno(fd), location, &error_msg)); if (map == nullptr) { CHECK(!error_msg.empty()); return; } END_TRACE("mmap") } if (map->size() < sizeof(DexFileHeader)) { fprintf(stderr, "DexFile: failed to open dex file '%s' that is too short to have a " "header\n", location); exit(1); } #ifdef DEBUG_LOG printf("Success: mmap() of file '%s'\n", location); #endif auto dh = reinterpret_cast<const dex_header*>(map->begin()); auto class_defs_off = dh->class_defs_off; std::unordered_map<uint32_t, uint32_t> class_data_offset; std::unordered_map<uint32_t, uint32_t> code_item_offset; { constexpr uint32_t kBufSize = 0x80000; // 1MB output buffer std::unique_ptr<char[]> buf(new char[kBufSize]); WritableBuffer out_buffer(out, buf.get(), kBufSize); std::string canary_name; for (size_t i = 0; i < map->size(); i++) { if (i >= class_defs_off && i < class_defs_off + dh->class_defs_size * sizeof(dex_class_def) && (i - class_defs_off) % sizeof(dex_class_def) == 0) { const dex_class_def* cdef = reinterpret_cast<const dex_class_def*>(map->begin() + i); const uint32_t* class_desc = reinterpret_cast<const uint32_t*>( map->begin() + dh->type_ids_off + cdef->typeidx * sizeof(type_id_item)); const uint32_t* class_string_desc = reinterpret_cast<const uint32_t*>( map->begin() + dh->string_ids_off + (*class_desc) * sizeof(string_id_item)); std::string class_name = read_string(reinterpret_cast<const uint8_t*>( map->begin() + (*class_string_desc))); #ifdef DEBUG_LOG printf("==================\n"); printf("Class begins at %p\n", (void*)cdef); printf("Class data offset: %u\n", cdef->class_data_offset); printf("Class: %s\n", class_name.c_str()); #endif std::size_t found = class_name.find("Canary"); if (found != std::string::npos) { #ifdef DEBUG_LOG printf("Found Canary Class: %s\n", class_name.c_str()); #endif canary_name = std::move(class_name); } if (cdef->class_data_offset) { load_class_data_item(map->begin() + cdef->class_data_offset, code_item_offset); } } if (code_item_offset.count(i) != 0) { #ifdef DEBUG_LOG printf("==================\n"); printf("Code item offset: %zu\n", i); #endif load_code_item(map->begin() + i, quick_data, &canary_name, i, out_buffer); } else { out_buffer << reinterpret_cast<char*>(map->begin() + i); } } } } void print_dex_opcodes(const uint8_t* begin, const size_t size) { stream::stream_dex( begin, size, [](DexOpcode opcode, const uint16_t* const insn) { // clang-format off switch (opcode) { case DOPCODE_NOP: SWITCH_FORMAT_10 SWITCH_FORMAT_RETURN_VOID_NO_BARRIER { printf("OPCODE: %02x :: %s :: %04x\n", opcode, ::print(opcode).c_str(), *insn); break; } SWITCH_FORMAT_20 SWITCH_FORMAT_REGULAR_FIELD_REF SWITCH_FORMAT_QUICK_FIELD_REF SWITCH_FORMAT_CONST_STRING SWITCH_FORMAT_TYPE_REF { printf("OPCODE: %02x :: %s :: %04x%04x\n", opcode, ::print(opcode).c_str(), *insn, *(insn + 1)); break; } SWITCH_FORMAT_30 SWITCH_FORMAT_REGULAR_METHOD_REF SWITCH_FORMAT_QUICK_METHOD_REF SWITCH_FORMAT_CONST_STRING_JUMBO SWITCH_FORMAT_FILL_ARRAY { printf("OPCODE: %02x :: %s :: %04x%04x%04x\n", opcode, ::print(opcode).c_str(), *insn, *(insn + 1), *(insn + 2)); break; } SWITCH_FORMAT_50 { printf("OPCODE: %02x :: %s :: %04x%04x%04x%04x%04x\n", opcode, ::print(opcode).c_str(), *insn, *(insn + 1), *(insn + 2), *(insn + 3), *(insn + 4)); break; } default: { fprintf(stderr, "Unknown opcode %02x\n", opcode); } } // clang-format on }, [](const uint8_t* const insn) {}); } void stream::stream_dex(const uint8_t* begin, const size_t size, InsnWalkerFn insn_walker, CodeItemWalkerFn code_item_walker) { auto dh = reinterpret_cast<const dex_header*>(begin); auto class_defs_off = dh->class_defs_off; std::unordered_map<uint32_t, uint32_t> code_item_offset; { std::string canary_name; for (size_t i = 0; i < size; i++) { if (i >= class_defs_off && i < class_defs_off + dh->class_defs_size * sizeof(dex_class_def) && (i - class_defs_off) % sizeof(dex_class_def) == 0) { const dex_class_def* cdef = reinterpret_cast<const dex_class_def*>(begin + i); const uint32_t* class_desc = reinterpret_cast<const uint32_t*>( begin + dh->type_ids_off + cdef->typeidx * sizeof(type_id_item)); const uint32_t* class_string_desc = reinterpret_cast<const uint32_t*>( begin + dh->string_ids_off + (*class_desc) * sizeof(string_id_item)); std::string class_name = read_string( reinterpret_cast<const uint8_t*>(begin + (*class_string_desc))); #ifdef DEBUG_LOG printf("==================\n"); printf("Class begins at %p\n", (void*)cdef); printf("Class data offset: %u\n", cdef->class_data_offset); printf("Class: %s\n", class_name.c_str()); #endif std::size_t found = class_name.find("Canary"); if (found != std::string::npos) { #ifdef DEBUG_LOG printf("Found Canary Class: %s\n", class_name.c_str()); #endif canary_name = std::move(class_name); } load_class_data_item(begin + cdef->class_data_offset, code_item_offset); } if (code_item_offset.count(i) != 0) { #ifdef DEBUG_LOG printf("==================\n"); printf("Code item offset: %zu\n", i); #endif code_item_walker(begin + i); process_code_item(begin + i, insn_walker); } } } }