tools/python/dex.py (4,085 lines of code) (raw):
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import bisect
import copy
import enum
import io
import numbers
import operator
import optparse
import os
import re
import string
import sys
import zipfile
from io import BytesIO
import file_extract
from file_extract import AutoParser
def get_uleb128_byte_size(value):
byte_size = 1
while value >= 0x80:
byte_size += 1
value >>= 7
return byte_size
def get_uleb128p1_byte_size(value):
return get_uleb128_byte_size(value + 1)
# ----------------------------------------------------------------------
# Constants
# ----------------------------------------------------------------------
MAGIC = b"dex\n"
ENDIAN_CONSTANT = 0x12345678
REVERSE_ENDIAN_CONSTANT = 0x78563412
NO_INDEX = 0xFFFFFFFF
INT4_MIN = -8
INT4_MAX = 7
INT8_MIN = -128
INT8_MAX = 127
INT16_MIN = -32768
INT16_MAX = 32767
INT24_MIN = -8388608
INT24_MAX = 8388607
INT32_MIN = -2147483648
INT32_MAX = 2147483647
UINT4_MAX = 15
UINT8_MAX = 255
UINT16_MAX = 65535
UINT32_MAX = 4294967295
class AccessFlags(enum.IntFlag):
PUBLIC = 0x1
PRIVATE = 0x2
PROTECTED = 0x4
STATIC = 0x8
FINAL = 0x10
SYNCHRONIZED = 0x20
VOLATILE = 0x40
BRIDGE = 0x40
TRANSIENT = 0x80
VARARGS = 0x80
NATIVE = 0x100
INTERFACE = 0x200
ABSTRACT = 0x400
STRICT = 0x800
SYNTHETIC = 0x1000
ANNOTATION = 0x2000
ENUM = 0x4000
CONSTRUCTOR = 0x10000
DECLARED_SYNCHRONIZED = 0x20000
@classmethod
def _missing_(cls, value):
if isinstance(value, file_extract.FileExtract):
return cls(value.get_uint8())
else:
return super()._missing_(value)
def __str__(self):
return ", ".join(flag.name for flag in AccessFlags if flag & self.value)
class ValueFormat(enum.Enum):
BYTE = 0x00
SHORT = 0x02
CHAR = 0x03
INT = 0x04
LONG = 0x06
FLOAT = 0x10
DOUBLE = 0x11
METHOD_TYPE = 0x15
METHOD_HANDLE = 0x16
STRING = 0x17
TYPE = 0x18
FIELD = 0x19
METHOD = 0x1A
ENUM = 0x1B
ARRAY = 0x1C
ANNOTATION = 0x1D
NULL = 0x1E
BOOLEAN = 0x1F
@classmethod
def _missing_(cls, value):
if isinstance(value, file_extract.FileExtract):
return cls(value.get_uint16())
else:
return super()._missing_(value)
class TypeCode(enum.Enum):
HEADER_ITEM = 0x0000
STRING_ID_ITEM = 0x0001
TYPE_ID_ITEM = 0x0002
PROTO_ID_ITEM = 0x0003
FIELD_ID_ITEM = 0x0004
METHOD_ID_ITEM = 0x0005
CLASS_DEF_ITEM = 0x0006
CALL_SITE_ID_ITEM = 0x0007
METHOD_HANDLE_ITEM = 0x0008
MAP_LIST = 0x1000
TYPE_LIST = 0x1001
ANNOTATION_SET_REF_LIST = 0x1002
ANNOTATION_SET_ITEM = 0x1003
CLASS_DATA_ITEM = 0x2000
CODE_ITEM = 0x2001
STRING_DATA_ITEM = 0x2002
DEBUG_INFO_ITEM = 0x2003
ANNOTATION_ITEM = 0x2004
ENCODED_ARRAY_ITEM = 0x2005
ANNOTATIONS_DIRECTORY_ITEM = 0x2006
@classmethod
def _missing_(cls, value):
if isinstance(value, file_extract.FileExtract):
return cls(value.get_uint16())
else:
return super()._missing_(value)
def dump(self, prefix=None, f=sys.stdout, print_name=True, parent_path=None):
f.write(self.name)
@staticmethod
def max_width():
return max(len(bin(flag.value)) for flag in TypeCode)
class MethodHandleTypeCode(enum.Enum):
STATIC_PUT = 0x00
STATIC_GET = 0x01
INSTANCE_PUT = 0x02
INSTANCE_GET = 0x03
INVOKE_STATIC = 0x04
INVOKE_INSTANCE = 0x05
@classmethod
def _missing_(cls, value):
if isinstance(value, file_extract.FileExtract):
return cls(value.get_uint16())
else:
return super()._missing_(value)
PRINTABLE = string.ascii_letters + string.digits + string.punctuation + " "
def escape(c):
global PRINTABLE
if c in PRINTABLE:
return c
c = ord(c)
if c <= 0xFF:
return "\\x" + "%02.2x" % (c)
elif c <= "\uffff":
return "\\u" + "%04.4x" % (c)
else:
return "\\U" + "%08.8x" % (c)
def print_string(s, f):
f.write('"')
f.write("".join(escape(c) for c in s))
f.write('"')
def print_version(version, f):
if len(version) == 3:
f.write("%u.%u.%u\n" % (version[0], version[1], version[2]))
def print_hex_bytes(data, f):
for byte in data:
f.write("%2.2x" % (byte))
f.write("\n")
def print_endian(value, f):
f.write("%#8.8x" % (value))
if value == ENDIAN_CONSTANT:
f.write(" (ENDIAN_CONSTANT)")
elif value == REVERSE_ENDIAN_CONSTANT:
f.write(" (REVERSE_ENDIAN_CONSTANT)")
f.write("\n")
def is_zero(value):
if value == 0:
return None
return "value should be zero, bit is %s" % (str(value))
def is_dex_magic(magic):
if magic == MAGIC:
return None
return "value should be %s but is %s" % (MAGIC, magic)
def hex_escape(s):
return "".join(escape(c) for c in s)
# ----------------------------------------------------------------------
# encoded_field
# ----------------------------------------------------------------------
class encoded_field(AutoParser):
items = [
{"type": "uleb", "name": "field_idx", "format": "%u"},
{"type": "uleb", "name": "access_flags", "format": "0x%8.8x"},
]
def __init__(self, data):
AutoParser.__init__(self, self.items, data)
@classmethod
def fixup_indexes(cls, items):
for i in range(1, len(items)):
items[i].field_idx += items[i - 1].field_idx
@classmethod
def get_table_header(cls):
return "FIELD FLAGS\n"
def get_dump_flat(self):
return True
def get_access_flags(self):
return AccessFlags(self.access_flags)
# ----------------------------------------------------------------------
# encoded_method
# ----------------------------------------------------------------------
class encoded_method(AutoParser):
items = [
{"type": "uleb", "name": "method_idx", "format": "%u"},
{"type": "uleb", "name": "access_flags", "format": "0x%8.8x"},
{"type": "uleb", "name": "code_off", "format": "0x%8.8x"},
]
def __init__(self, data):
AutoParser.__init__(self, self.items, data)
@classmethod
def fixup_indexes(cls, items):
for i in range(1, len(items)):
items[i].method_idx += items[i - 1].method_idx
@classmethod
def get_table_header(cls):
return "METHOD FLAGS\n"
def get_dump_flat(self):
return True
def get_access_flags(self):
return AccessFlags(self.access_flags)
# ----------------------------------------------------------------------
# class_data_item
# ----------------------------------------------------------------------
class class_data_item(AutoParser):
items = [
{"type": "uleb", "name": "static_fields_size"},
{"type": "uleb", "name": "instance_fields_size"},
{"type": "uleb", "name": "direct_methods_size"},
{"type": "uleb", "name": "virtual_methods_size"},
{
"class": encoded_field,
"name": "static_fields",
"attr_count": "static_fields_size",
"flat": True,
},
{
"class": encoded_field,
"name": "instance_fields",
"attr_count": "instance_fields_size",
"flat": True,
},
{
"class": encoded_method,
"name": "direct_methods",
"attr_count": "direct_methods_size",
"flat": True,
},
{
"class": encoded_method,
"name": "virtual_methods",
"attr_count": "virtual_methods_size",
"flat": True,
},
]
def __init__(self, data):
AutoParser.__init__(self, self.items, data)
encoded_field.fixup_indexes(self.static_fields)
encoded_field.fixup_indexes(self.instance_fields)
encoded_method.fixup_indexes(self.direct_methods)
encoded_method.fixup_indexes(self.virtual_methods)
@classmethod
def create_empty(cls):
data = file_extract.FileExtract(BytesIO(b"\0\0\0\0"), "=")
return class_data_item(data)
# ----------------------------------------------------------------------
# class_def_item
# ----------------------------------------------------------------------
class class_def_item(AutoParser):
items = [
{"type": "u32", "name": "class_idx", "align": 4},
{"type": "u32", "name": "access_flags"},
{"type": "u32", "name": "superclass_idx"},
{"type": "u32", "name": "interfaces_off"},
{"type": "u32", "name": "source_file_idx"},
{"type": "u32", "name": "annotations_off"},
{"type": "u32", "name": "class_data_off"},
{"type": "u32", "name": "static_values_off"},
{
"class": class_data_item,
"name": "class_data",
"attr_offset": "class_data_off",
"condition": lambda item, data: item.class_data_off != 0,
"dump": False,
"default": class_data_item.create_empty(),
},
]
def __init__(self, data, context):
AutoParser.__init__(self, self.items, data, context)
self.data = data
self.interface_ids = None
@classmethod
def get_table_header(cls):
return (
"CLASS ACCESS SUPERCLASS INTERFACES SOURCE"
" ANNOTATION CLASS_DATA STATIC_VALUES\n"
)
def get_dump_flat(self):
return True
def find_encoded_method_by_code_off(self, code_off):
for encoded_method in self.class_data.direct_methods:
if encoded_method.code_off == code_off:
return encoded_method
for encoded_method in self.class_data.virtual_methods:
if encoded_method.code_off == code_off:
return encoded_method
return None
def get_interface_ids(self):
if self.interface_ids is not None:
return self.interface_ids
elif self.interfaces_off > 0:
self.data.push_offset_and_seek(self.interfaces_off)
self.interface_ids = type_list(self.data).list
self.data.pop_offset_and_seek()
else:
self.interface_ids = []
return self.interface_ids
def get_access_flags(self):
return AccessFlags(self.access_flags)
# ----------------------------------------------------------------------
# try_item
# ----------------------------------------------------------------------
class try_item(AutoParser):
items = [
{"type": "u32", "name": "start_addr"},
{"type": "u16", "name": "insn_count"},
{"type": "u16", "name": "handler_off"},
]
def __init__(self, data):
AutoParser.__init__(self, self.items, data)
def get_dump_flat(self):
return True
# ----------------------------------------------------------------------
# encoded_type_addr_pair
# ----------------------------------------------------------------------
class encoded_type_addr_pair(AutoParser):
items = [
{"type": "uleb", "name": "type_idx", "format": "%#8.8x"},
{"type": "uleb", "name": "addr", "format": "%#8.8x"},
]
def __init__(self, data):
AutoParser.__init__(self, self.items, data)
def get_dump_flat(self):
return True
# ----------------------------------------------------------------------
# encoded_catch_handler
# ----------------------------------------------------------------------
class encoded_catch_handler(AutoParser):
items = [
{"type": "sleb", "name": "size"},
{
"class": encoded_type_addr_pair,
"name": "handlers",
"attr_count": "size",
"attr_count_fixup": abs,
},
{
"type": "uleb",
"name": "catch_all_addr",
"default": 0,
"condition": lambda item, data: item.size <= 0,
},
]
def __init__(self, data):
AutoParser.__init__(self, self.items, data)
def get_dump_flat(self):
return True
# ----------------------------------------------------------------------
# encoded_catch_handler_list
# ----------------------------------------------------------------------
class encoded_catch_handler_list(AutoParser):
items = [
{"type": "uleb", "name": "size"},
{"class": encoded_catch_handler, "name": "list", "attr_count": "size"},
]
def __init__(self, data):
AutoParser.__init__(self, self.items, data)
def get_dump_flat(self):
return True
def print_instructions(insns, prefix, flat, f, context=None):
f.write("\n")
code_units = CodeUnits(insns)
dex_inst = DexInstruction()
while code_units.index_is_valid():
dex_inst.decode(code_units)
if prefix:
f.write(prefix)
f.write(" ")
dex_inst.dump(context=context)
class DBG(enum.IntEnum):
END_SEQUENCE = 0x00
ADVANCE_PC = 0x01
ADVANCE_LINE = 0x02
START_LOCAL = 0x03
START_LOCAL_EXTENDED = 0x04
END_LOCAL = 0x05
RESTART_LOCAL = 0x06
SET_PROLOGUE_END = 0x07
SET_EPILOGUE_BEGIN = 0x08
SET_FILE = 0x09
@staticmethod
def is_special_opcode(value):
return value >= 0x0A and value <= 0xFF
@classmethod
def _missing_(cls, value):
val = value
if isinstance(value, file_extract.FileExtract):
val = value.get_uint8()
try:
return cls(val)
except ValueError:
return super()._missing_(value)
def dump(self, prefix=None, f=sys.stdout, print_name=True, parent_path=None):
f.write(self.name)
def decode_DBG_or_val(value):
val = value
if isinstance(value, file_extract.FileExtract):
val = value.get_uint8()
return val if DBG.is_special_opcode(val) else DBG(val)
class debug_info_op(AutoParser):
items = [
{"decode": decode_DBG_or_val, "name": "op"},
{
"switch": "op",
"cases": {
DBG.ADVANCE_PC: [{"type": "uleb", "name": "addr_offset"}],
DBG.ADVANCE_LINE: [{"type": "sleb", "name": "line_offset"}],
DBG.START_LOCAL: [
{"type": "uleb", "name": "register_num"},
{"type": "ulebp1", "name": "name_idx"},
{"type": "ulebp1", "name": "type_idx"},
],
DBG.START_LOCAL_EXTENDED: [
{"type": "uleb", "name": "register_num"},
{"type": "ulebp1", "name": "name_idx"},
{"type": "ulebp1", "name": "type_idx"},
{"type": "ulebp1", "name": "sig_idx"},
],
DBG.END_LOCAL: [{"type": "uleb", "name": "register_num"}],
DBG.RESTART_LOCAL: [{"type": "uleb", "name": "register_num"}],
DBG.SET_FILE: [{"type": "ulebp1", "name": "name_idx"}],
"default": [],
},
},
]
def __init__(self, data):
AutoParser.__init__(self, self.items, data)
if DBG.is_special_opcode(self.op):
line_base = -4
line_range = 15
adjusted_opcode = self.op - 0x0A
self.line_offset = line_base + (adjusted_opcode % line_range)
self.addr_offset = int(adjusted_opcode / line_range)
self.byte_size = data.tell() - self.get_offset()
def get_dump_flat(self):
return True
def get_byte_size(self):
return self.byte_size
def dump_opcode(self, f=sys.stdout):
if isinstance(self.op, DBG):
f.write(self.op.name)
else:
assert DBG.is_special_opcode(self.op)
f.write(f"SPECIAL_OPCODE({self.op})")
if self.op == DBG.ADVANCE_PC:
f.write("(%u)" % self.addr_offset)
elif self.op == DBG.ADVANCE_LINE:
f.write("(%u)" % self.line_offset)
elif self.op == DBG.START_LOCAL:
f.write("(register_num=%u, name_idx=" % self.register_num)
if self.name_idx < 0:
f.write("NO_INDEX")
else:
f.write("%u" % (self.name_idx))
f.write(", type_idx=")
if self.type_idx < 0:
f.write("NO_INDEX)")
else:
f.write("%u)" % (self.type_idx))
elif self.op == DBG.START_LOCAL_EXTENDED:
f.write("(register_num=%u, name_idx=" % self.register_num)
if self.name_idx < 0:
f.write("NO_INDEX")
else:
f.write("%u" % (self.name_idx))
f.write(", type_idx=")
if self.type_idx < 0:
f.write("NO_INDEX")
else:
f.write("%u" % (self.type_idx))
f.write(", sig_idx=")
if self.type_idx < 0:
f.write("NO_INDEX)")
else:
f.write("%u)" % (self.type_idx))
elif self.op == DBG.END_LOCAL or self.op == DBG.RESTART_LOCAL:
f.write("(register_num=%u)" % self.register_num)
elif self.op == DBG.SET_FILE:
f.write("(name_idx=%u)" % self.name_idx)
elif DBG.is_special_opcode(self.op):
f.write(
" (addr_offset=%u, line_offset=%i)"
% (self.addr_offset, self.line_offset)
)
class debug_info_item(AutoParser):
items = [
{"type": "uleb", "name": "line_start"},
{"type": "uleb", "name": "parameters_size"},
{"type": "ulebp1", "name": "parameter_names", "attr_count": "parameters_size"},
]
class row(object):
def __init__(self):
self.address = 0
self.line = 0
self.source_file = -1
self.prologue_end = False
self.epilogue_begin = False
def dump(self, f=sys.stdout):
f.write("0x%4.4x %5u %5u " % (self.address, self.line, self.source_file))
if self.prologue_end or self.epilogue_begin:
if self.prologue_end:
f.write("P ")
else:
f.write(" ")
if self.epilogue_begin:
f.write("E")
f.write("\n")
def __init__(self, data):
AutoParser.__init__(self, self.items, data)
self.data = data
self.ops = None
self.line_table = None
self.debug_info_offset = data.tell()
def check_encoding(self, dex_method, f=sys.stdout):
bytes_saved = 0
ops = self.get_ops()
if len(ops) == 1:
op = ops[0]
if op.op == DBG.END_SEQUENCE:
bytes_saved += get_uleb128_byte_size(
self.line_start
) + get_uleb128p1_byte_size(self.parameters_size)
for parameter_name in self.parameter_names:
bytes_saved += get_uleb128p1_byte_size(parameter_name)
bytes_saved += 1
f.write(
"warning: %s debug info contains only a single "
% (dex_method.get_qualified_name())
)
f.write("%s, all debug info can be removed " % (op.op))
f.write("(%u bytes)\n" % (bytes_saved))
return bytes_saved
# Dex files built for release don't need any the following
# debug info ops
for op in ops:
size = op.get_byte_size()
if op.op == DBG.SET_PROLOGUE_END:
f.write(
"warning: %s %s can be removed (%u byte)\n"
% (dex_method.get_qualified_name(), op.op, size)
)
bytes_saved += size
elif op.op == DBG.SET_EPILOGUE_BEGIN:
f.write(
"warning: %s %s can be removed (%u byte)\n"
% (dex_method.get_qualified_name(), op.op, size)
)
bytes_saved += size
elif op.op == DBG.START_LOCAL:
f.write(
"warning: %s %s can be removed (%u bytes)\n"
% (dex_method.get_qualified_name(), op.op, size)
)
bytes_saved += size
elif op.op == DBG.START_LOCAL_EXTENDED:
f.write(
"warning: %s %s can be removed (%u bytes)\n"
% (dex_method.get_qualified_name(), op.op, size)
)
bytes_saved += size
elif op.op == DBG.END_LOCAL:
f.write(
"warning: %s %s can be removed (%u bytes)\n"
% (dex_method.get_qualified_name(), op.op, size)
)
bytes_saved += size
elif op.op == DBG.RESTART_LOCAL:
f.write(
"warning: %s %s can be removed (%u bytes)\n"
% (dex_method.get_qualified_name(), op.op, size)
)
bytes_saved += size
return bytes_saved
def get_line_table(self):
if self.line_table is None:
line_table = []
ops = self.get_ops()
row = debug_info_item.row()
for op_args in ops:
op = op_args.op
if op == DBG.END_SEQUENCE:
break
if op == DBG.ADVANCE_PC:
row.address += op_args.addr_offset
elif op == DBG.ADVANCE_LINE:
row.line += op_args.line_offset
elif op == DBG.START_LOCAL:
pass
elif op == DBG.START_LOCAL_EXTENDED:
pass
elif op == DBG.END_LOCAL:
pass
elif op == DBG.RESTART_LOCAL:
pass
elif op == DBG.SET_PROLOGUE_END:
row.prologue_end = True
elif op == DBG.SET_EPILOGUE_BEGIN:
row.epilogue_begin = True
elif op == DBG.SET_FILE:
row.source_file = op_args.name_idx
else:
row.line += op_args.line_offset
row.address += op_args.addr_offset
line_table.append(copy.copy(row))
row.prologue_end = False
row.epilogue_begin = False
self.line_table = line_table
return self.line_table
def get_ops(self, reset_offset=True):
if self.ops is None:
data = self.data
if reset_offset:
data.push_offset_and_seek(self.debug_info_offset)
else:
data.seek(self.debug_info_offset)
self.ops = []
while True:
op = debug_info_op(data)
self.ops.append(op)
if op.op == DBG.END_SEQUENCE:
break
if reset_offset:
data.pop_offset_and_seek()
return self.ops
def dump_debug_info(self, f=sys.stdout, prefix=None, reset_offset=True):
ops = self.get_ops(reset_offset=reset_offset)
if prefix:
f.write(prefix)
f.write(" ")
f.write(
"line_start={}({}) param_size={}({}) param_name=[{}]\n".format(
self.line_start,
get_uleb128_byte_size(self.line_start),
self.parameters_size,
get_uleb128_byte_size(self.parameters_size),
", ".join(map(lambda x: str(x), self.parameter_names)),
)
)
for op in ops:
if prefix:
f.write(prefix)
f.write(" ")
op.dump_opcode(f=f)
f.write("\n")
# ----------------------------------------------------------------------
# code_item
# ----------------------------------------------------------------------
class code_item(AutoParser):
items = [
{"type": "u16", "name": "registers_size", "align": 4},
{"type": "u16", "name": "ins_size"},
{"type": "u16", "name": "outs_size"},
{"type": "u16", "name": "tries_size"},
{"type": "u32", "name": "debug_info_off"},
{"type": "u32", "name": "insns_size", "format": "%u"},
{
"type": "u16",
"name": "insns",
"attr_count": "insns_size",
"dump_list": print_instructions,
},
{
"type": "u16",
"condition": lambda item, data: item.tries_size != 0
and item.insns_size & 1,
},
{
"class": try_item,
"name": "tries",
"attr_count": "tries_size",
"condition": lambda item, data: item.tries_size != 0,
"default": None,
},
{
"class": encoded_catch_handler_list,
"name": "handlers",
"condition": lambda item, data: item.tries_size != 0,
"default": None,
},
]
def __init__(self, data, context):
AutoParser.__init__(self, self.items, data, context)
self.debug_info = None
self.data = data
# Convert insns from a list to a tuple to avoid mutation and also to
# allow self.insns to be hashed.
self.insns = tuple(self.insns)
def get_debug_info(self):
if self.debug_info is None and self.debug_info_off > 0:
data = self.data
data.push_offset_and_seek(self.debug_info_off)
self.debug_info = debug_info_item(data)
data.pop_offset_and_seek()
return self.debug_info
def dump(self, f, prefix, verbose=False):
if verbose:
AutoParser.dump(self, f=f, prefix=prefix)
else:
print_instructions(self.insns, prefix, False, f, self.context)
class encoded_value:
def __init__(self, data):
arg_type = data.get_uint8()
value_arg = arg_type >> 5
self.value_type = ValueFormat(arg_type & 0x1F)
self.value = None
size = value_arg + 1
if self.value_type == ValueFormat.BYTE:
if value_arg != 0:
raise ValueError("VALUE_BYTE value_arg != 0 (%u)" % (value_arg))
self.value = data.get_sint8()
elif self.value_type == ValueFormat.SHORT:
self.value = data.get_sint_size(size)
elif self.value_type == ValueFormat.CHAR:
self.value = data.get_uint_size(size)
elif self.value_type == ValueFormat.INT:
self.value = data.get_sint_size(size)
elif self.value_type == ValueFormat.LONG:
self.value = data.get_sint_size(size)
elif self.value_type == ValueFormat.FLOAT:
raise ValueError("VALUE_FLOAT not supported yet")
elif self.value_type == ValueFormat.DOUBLE:
raise ValueError("VALUE_DOUBLE not supported yet")
elif self.value_type == ValueFormat.METHOD_TYPE:
self.value = data.get_uint_size(size)
elif self.value_type == ValueFormat.METHOD_HANDLE:
self.value = data.get_uint_size(size)
elif self.value_type == ValueFormat.STRING:
self.value = data.get_uint_size(size)
elif self.value_type == ValueFormat.TYPE:
self.value = data.get_uint_size(size)
elif self.value_type == ValueFormat.FIELD:
self.value = data.get_uint_size(size)
elif self.value_type == ValueFormat.METHOD:
self.value = data.get_uint_size(size)
elif self.value_type == ValueFormat.ENUM:
self.value = data.get_uint_size(size)
elif self.value_type == ValueFormat.ARRAY:
if value_arg != 0:
raise ValueError("VALUE_ARRAY value_arg != 0 (%u)" % (value_arg))
raise ValueError("VALUE_ARRAY not supported yet")
# encoded_array: an array of values, in the format specified by
# "encoded_array format". The size of the value is implicit in
# the encoding.
elif self.value_type == ValueFormat.ANNOTATION:
if value_arg != 0:
raise ValueError("VALUE_ANNOTATION value_arg != 0 (%u)" % (value_arg))
# encoded_annotation: a sub-annotation, in the format specified by
# "encoded_annotation format" below. The size of the value is
# implicit in the encoding.
elif self.value_type == ValueFormat.NULL:
if value_arg != 0:
raise ValueError("VALUE_ARRAY value_arg != 0 (%u)" % (value_arg))
self.value = 0
elif self.value_type == ValueFormat.BOOLEAN:
if size == 0:
self.value = False
else:
self.value = data.get_uint8() != 0
# ----------------------------------------------------------------------
# encoded_array
# ----------------------------------------------------------------------
class encoded_array(AutoParser):
items = [
{"type": "uleb", "name": "size"},
{"class": encoded_value, "name": "values", "attr_count": "size"},
]
def __init__(self, data):
AutoParser.__init__(self, self.items, data)
class encoded_array_item(AutoParser):
items = [{"class": encoded_array, "name": "value"}]
def __init__(self, data):
AutoParser.__init__(self, self.items, data)
# ----------------------------------------------------------------------
# field_id_item
# ----------------------------------------------------------------------
class field_id_item(AutoParser):
items = [
{"type": "u16", "name": "class_idx", "align": 4},
{"type": "u16", "name": "type_idx"},
{"type": "u32", "name": "name_idx"},
]
def __init__(self, data, context):
AutoParser.__init__(self, self.items, data, context)
@classmethod
def get_table_header(cls):
return "CLASS TYPE NAME\n"
def get_dump_flat(self):
return True
# ----------------------------------------------------------------------
# header_item
# ----------------------------------------------------------------------
class header_item(AutoParser):
items = [
{"type": "cstr[4]", "name": "magic", "validate": is_dex_magic},
{"type": "u8[3]", "name": "version", "dump": print_version},
{"type": "u8", "validate": is_zero}, # NULL byte
{"type": "u32", "name": "checksum"},
{"type": "u8[20]", "name": "signature", "dump": print_hex_bytes},
{"type": "u32", "name": "file_size"},
{"type": "u32", "name": "header_size"},
{"type": "u32", "name": "endian_tag", "type": "u32", "dump": print_endian},
{"type": "u32", "name": "link_size"},
{"type": "u32", "name": "link_off"},
{"type": "u32", "name": "map_off"},
{"type": "u32", "name": "string_ids_size"},
{"type": "u32", "name": "string_ids_off"},
{"type": "u32", "name": "type_ids_size"},
{"type": "u32", "name": "type_ids_off"},
{"type": "u32", "name": "proto_ids_size"},
{"type": "u32", "name": "proto_ids_off"},
{"type": "u32", "name": "field_ids_size"},
{"type": "u32", "name": "field_ids_off"},
{"type": "u32", "name": "method_ids_size"},
{"type": "u32", "name": "method_ids_off"},
{"type": "u32", "name": "class_defs_size"},
{"type": "u32", "name": "class_defs_off"},
{"type": "u32", "name": "data_size"},
{"type": "u32", "name": "data_off"},
]
def __init__(self, data):
AutoParser.__init__(self, self.items, data)
def get_dump_header(self):
return "DEX header:"
# ----------------------------------------------------------------------
# map_item
# ----------------------------------------------------------------------
class map_item(AutoParser):
items = [
{"class": TypeCode, "name": "type", "dump_width": TypeCode.max_width()},
{"type": "u16"},
{"type": "u32", "name": "size"},
{"type": "u32", "name": "offset"},
]
def __init__(self, data):
AutoParser.__init__(self, self.items, data)
def get_list_header_lines(self):
return [" TYPE SIZE OFFSET\n"]
def get_dump_flat(self):
return True
# ----------------------------------------------------------------------
# map_list
# ----------------------------------------------------------------------
class map_list(AutoParser):
items = [
{"type": "u32", "name": "size", "align": 4, "dump": False},
{"class": map_item, "name": "list", "attr_count": "size", "flat": True},
]
def get_dump_header(self):
return "map_list:"
def __init__(self, data):
AutoParser.__init__(self, self.items, data)
# ----------------------------------------------------------------------
# method_handle_item
# ----------------------------------------------------------------------
class method_handle_item(AutoParser):
items = [
{"class": MethodHandleTypeCode, "name": "method_handle_type", "align": 4},
{"type": "u16"},
{"type": "u16", "name": "field_or_method_id"},
{"type": "u16"},
]
def __init__(self, data):
AutoParser.__init__(self, self.items, data)
# ----------------------------------------------------------------------
# method_id_item
# ----------------------------------------------------------------------
class method_id_item(AutoParser):
items = [
{"type": "u16", "name": "class_idx", "align": 4},
{"type": "u16", "name": "proto_idx"},
{"type": "u32", "name": "name_idx"},
]
def __init__(self, data, context):
AutoParser.__init__(self, self.items, data, context)
@classmethod
def get_table_header(cls):
return "CLASS PROTO NAME\n"
def get_dump_flat(self):
return True
# ----------------------------------------------------------------------
# proto_id_item
# ----------------------------------------------------------------------
class proto_id_item(AutoParser):
items = [
{"type": "u32", "name": "shorty_idx", "align": 4},
{"type": "u32", "name": "return_type_idx"},
{"type": "u32", "name": "parameters_off"},
]
def __init__(self, data, context):
AutoParser.__init__(self, self.items, data, context)
self.parameters = None
def get_dump_flat(self):
return True
@classmethod
def get_table_header(cls):
return "SHORTY_IDX RETURN PARAMETERS\n"
def get_parameters(self):
if self.parameters_off != 0 and self.parameters is None:
# Get the data from our dex.File object
data = self.context.data
data.push_offset_and_seek(self.parameters_off)
self.parameters = type_list(data)
data.pop_offset_and_seek()
return self.parameters
# ----------------------------------------------------------------------
# string_data_item
# ----------------------------------------------------------------------
class string_data_item(AutoParser):
items = [
{"type": "uleb", "name": "utf16_size", "format": "%3u"},
{"type": "cstr", "name": "data", "dump": print_string},
]
def __init__(self, data):
AutoParser.__init__(self, self.items, data)
def get_dump_flat(self):
return True
# ----------------------------------------------------------------------
# type_list
# ----------------------------------------------------------------------
class type_list(AutoParser):
items = [
{"type": "u32", "name": "size", "align": 4},
{"type": "u16", "name": "list", "attr_count": "size"},
]
def get_dump_header(self):
return "type_list:"
def __init__(self, data):
AutoParser.__init__(self, self.items, data)
class Progard:
"""Parses a proguard map file and does name lookups."""
def __init__(self, path):
self.path = path
self.classes_dict = {}
class_dict = None
regex = re.compile(r"\s+([0-9]+:[0-9]+:)?(.*) -> (.*)$")
with open(path, "r") as f:
for line in f:
line = line.rstrip("\n")
if line:
if line[0].isspace():
match = regex.match(line)
if match:
old = match.group(2)
new = match.group(3)
# print('other old = "%s"' % (old))
# print('other new = "%s"' % (new))
class_dict[new] = old
else:
(old, new) = line.split(" -> ")
new = new.rstrip(":")
# print('class old = "%s"' % (old))
# print('class new = "%s"' % (new))
class_dict = {}
self.classes_dict[new] = (old, class_dict)
def lookup_class(self, new_class):
"""Translate a new class name to the old class name."""
if new_class in self.classes_dict:
(old_class, class_dict) = self.classes_dict[new_class]
if old_class is not None:
return old_class
return None
def lookup_method(self, new_class, new_method):
"""Translate a new class name and a new method into the old class
name and the old method name."""
if new_class in self.classes_dict:
(old_class, class_dict) = self.classes_dict[new_class]
if new_method in class_dict:
return class_dict[new_method]
return None
class DexMethod:
"""Encapsulates a method within a DEX file."""
def __init__(self, dex_class, encoded_method, is_virtual):
self.dex_class = dex_class
self.encoded_method = encoded_method
self.method_id = None
self.is_virtual = is_virtual
self.code_item = None
self.insns = None
self.name_in_file = None
self.name = None
def __len__(self):
ci = self.get_code_item()
return (len(self.encoded_method) if self.encoded_method else 0) + (
len(ci) if ci else 0
)
def get_signature(self):
class_name = self.get_class().get_name()
method_name = self.get_name()
proto = self.get_pretty_proto()
return class_name + "." + method_name + ":" + proto
def get_qualified_name(self):
class_name = self.get_class().get_name()
method_name = self.get_name()
if class_name[-1] != ";":
return class_name + ":" + method_name
else:
return class_name + method_name
def get_method_id(self):
"""Get the method_id_item for this method."""
if self.method_id is None:
self.method_id = self.get_dex().get_method_id(self.encoded_method)
return self.method_id
def get_method_index(self):
"""Get the method index into the method_ids array in the DEX file."""
return self.encoded_method.method_idx
def get_code_offset(self):
"""Get the code offset for this method."""
return self.encoded_method.code_off
def get_code_item_index(self):
"""Get the index into the code_items array in the dex file for the
code for this method, or -1 if there is no code for this method."""
code_item = self.get_code_item()
if code_item:
return self.get_dex().get_code_item_index_from_code_off(
code_item.get_offset()
)
return -1
def get_dex(self):
return self.dex_class.get_dex()
def get_name_in_file(self):
"""Returns the name of the method as it is known in the current DEX
file (no proguard remapping)"""
if self.name_in_file is None:
self.name_in_file = self.get_dex().get_string(self.get_method_id().name_idx)
return self.name_in_file
def get_name(self):
if self.name is None:
cls_mangled = self.get_class().get_mangled_name()
name_in_file = self.get_name_in_file()
if cls_mangled and name_in_file:
self.name = self.get_dex().demangle_class_method_name(
cls_mangled, name_in_file
)
if self.name is None:
self.name = name_in_file
return self.name
def get_internal_pretty_method_str_rep(self):
# What it looks like boolean com.facebook.common.jit.common.JitDisabledChecker.testCompileMethod(int)
dex = self.get_dex()
proto_id = dex.get_proto_id(self.get_method_id().proto_idx)
if proto_id is None:
return None
return_type = dex.get_typename(proto_id.return_type_idx)
param = "("
if proto_id.parameters_off != 0:
# type is type_list
param_list = proto_id.get_parameters().list
not_first = False
for type_idx in param_list:
if not_first:
param += ", "
param += dex.get_typename(type_idx)
param += ")"
cur_class_name = self.get_class().get_name()
cur_method_name = self.get_name()
return return_type + " " + cur_class_name + "." + cur_method_name + param
def get_pretty_proto(self):
dex = self.get_dex()
proto_id = dex.get_proto_id(self.get_method_id().proto_idx)
if proto_id is None:
return None
return_type = dex.get_raw_typename(proto_id.return_type_idx)
param = "("
if proto_id.parameters_off != 0:
# type is type_list
param_list = proto_id.get_parameters().list
for type_idx in param_list:
param += dex.get_raw_typename(type_idx)
param += ")"
return param + return_type
def get_class(self):
return self.dex_class
def get_code_item(self):
if self.code_item is None:
if self.encoded_method.code_off != 0:
self.code_item = self.get_dex().find_code_item(
self.encoded_method.code_off
)
return self.code_item
def get_instruction_count(self):
code_item = self.get_code_item()
if code_item:
return len(code_item.insns)
return 0
def get_code_byte_size(self):
code_item = self.get_code_item()
if code_item:
return len(code_item.insns) * 2
return 0
def get_instructions(self):
if self.insns is None:
self.insns = []
code_item = self.get_code_item()
if code_item:
code_units = CodeUnits(code_item.insns)
while code_units.index_is_valid():
insn = DexInstruction()
insn.decode(code_units)
self.insns.append(insn)
return self.insns
def is_abstract(self):
return bool(self.encoded_method.get_access_flags() & AccessFlags.ABSTRACT)
def is_native(self):
return bool(self.encoded_method.get_access_flags() & AccessFlags.NATIVE)
def is_synthetic(self):
return bool(self.encoded_method.get_access_flags() & AccessFlags.SYNTHETIC)
def is_public(self):
return bool(self.encoded_method.access_flags & AccessFlags.PUBLIC)
def is_private(self):
return bool(self.encoded_method.access_flags & AccessFlags.PRIVATE)
def is_protected(self):
return bool(self.encoded_method.access_flags & AccessFlags.PROTECTED)
def dump(self, options, f=sys.stdout):
dex = self.get_dex()
method_id = dex.get_method_id(self.encoded_method.method_idx)
f.write(
"method: %s %s.%s:%s (%s)\n"
% (
"virtual" if self.is_virtual else "direct",
self.get_class().get_name(),
self.get_name(),
dex.get_proto_string(method_id.proto_idx) if method_id else "",
str(self.encoded_method.get_access_flags()),
)
)
if options.verbose:
self.encoded_method.dump(f=f, prefix=" encoded_method.", flat=False)
if method_id:
method_id.dump(f=f, prefix=" method_id.", flat=False)
proto_id = dex.get_proto_id(method_id.proto_idx)
if proto_id:
proto_id.dump(f=f, prefix=" proto_id.", flat=False)
f.write("\n")
code_item_idx = dex.get_code_item_index_from_code_off(
self.encoded_method.code_off
)
if code_item_idx >= 0:
code_item = dex.get_code_items()[code_item_idx]
if options.verbose:
f.write(
" code_item[%u] @ %#8.8x:"
% (code_item_idx, code_item.get_offset())
)
code_item.dump(f=f, prefix=" ", verbose=options.verbose)
if options.dump_debug_info:
self.dump_debug_info(f=f, prefix=" ")
def dump_code(self, f=sys.stdout):
insns = self.get_instructions()
for insn in insns:
insn.dump(f=f)
def get_debug_info(self):
code_item = self.get_code_item()
if code_item:
return code_item.get_debug_info()
return None
def dump_debug_info(self, f=sys.stdout, prefix=None):
debug_info = self.get_debug_info()
if prefix:
f.write(prefix)
if debug_info:
f.write("debug info @ %#8.8x:\n" % (debug_info.get_offset()))
debug_info.dump_debug_info(f=f, prefix=prefix)
f.write("\n")
else:
f.write("no debug info\n")
def check_debug_info_encoding(self):
debug_info = self.get_debug_info()
if debug_info:
return debug_info.check_encoding(self)
def get_raw_access_flags(self):
return str(self.encoded_method.access_flags)
def get_line_number(self):
debug_info = self.get_debug_info()
if debug_info:
return debug_info.line_start
return 0
class DexClass:
"""Encapsulates a class within a DEX file."""
def __init__(self, dex, class_def):
self.dex = dex
self.class_def = class_def
self.methods = None
self.unsorted_methods = None
self.fields = None
self.mangled = None
self.demangled = None
self.method_mapping = None
def __len__(self):
return sum((len(m) for m in self.get_methods(sort=False))) + sum(
(len(f) for f in self.get_fields())
)
def dump(self, options, f=sys.stdout):
dex = self.get_dex()
class_def_offset = self.class_def.get_offset()
class_def_idx = dex.get_class_def_index_from_offset(class_def_offset)
class_data = self.class_def.class_data
f.write("\nclass: %s // @%#8.8x" % (self.get_name(), class_def_offset))
f.write(
"\n\tAccess flags: (%s)\n\tSuperclass: %s\n\tInterfaces: (%s)"
% (
str(self.class_def.get_access_flags()),
dex.get_typename(self.class_def.superclass_idx),
", ".join(
[
self.dex.get_typename(interface)
for interface in self.class_def.get_interface_ids()
]
),
)
)
field_ids = dex.get_field_ids()
def field_to_string(field):
field_item = field_ids[field.field_idx]
return "(%s) %s:%s" % (
str(field.get_access_flags()),
dex.get_string(field_item.name_idx),
dex.get_typename(field_item.type_idx),
)
f.write(
"\n\tStatic fields:\n\t\t%s"
% "\n\t\t".join(
[
field_to_string(class_data.static_fields[i])
for i in range(class_data.static_fields_size)
]
)
)
f.write(
"\n\tInstance fields:\n\t\t%s\n"
% "\n\t\t".join(
[
field_to_string(class_data.instance_fields[i])
for i in range(class_data.instance_fields_size)
]
)
)
if options.verbose:
f.write(" class_def[%u] @ %#8.8x:\n" % (class_def_idx, class_def_offset))
self.class_def.dump(f=f, flat=False, prefix=" ")
f.write(
" class_data_item @ %#8.8x:\n"
% (self.class_def.class_data.get_offset())
)
self.class_def.class_data.dump(f=f, flat=False, prefix=" ")
f.write("\n")
def get_type_index(self):
"""Get type ID index (class_idx) for this class."""
return self.class_def.class_idx
def is_abstract(self):
return bool(self.class_def.get_access_flags() & AccessFlags.ABSTRACT)
def is_public(self):
return bool(self.class_def.access_flags & AccessFlags.PUBLIC)
def is_private(self):
return bool(self.class_def.access_flags & AccessFlags.PRIVATE)
def is_protected(self):
return bool(self.class_def.access_flags & AccessFlags.PROTECTED)
def get_mangled_name(self):
if self.mangled is None:
dex = self.get_dex()
self.mangled = dex.get_typename(self.class_def.class_idx)
return self.mangled
def get_name(self):
"""Get the demangled name for a class if we have a proguard file or
return the mangled name if we don't have a proguard file."""
if self.demangled is None:
mangled = self.get_mangled_name()
if mangled:
self.demangled = self.get_dex().demangle_class_name(mangled)
if self.demangled is None:
self.demangled = mangled
return self.demangled
def get_dex(self):
return self.dex
def get_methods(self, sort=True):
if self.methods is not None:
return self.methods
if self.unsorted_methods is not None and not sort:
return self.unsorted_methods
if not sort:
class_data = self.class_def.class_data
self.unsorted_methods = [
DexMethod(self, m, False) for m in class_data.direct_methods
]
return self.unsorted_methods
if self.unsorted_methods is not None:
self.methods = sorted(
self.unsorted_methods, key=lambda method: method.get_line_number()
)
self.unsorted_methods = None
else:
class_data = self.class_def.class_data
self.methods = sorted(
[DexMethod(self, m, False) for m in class_data.direct_methods]
+ [DexMethod(self, m, True) for m in class_data.virtual_methods],
key=lambda method: method.get_line_number(),
)
return self.methods
def get_fields(self):
if self.fields is None:
self.fields = []
for encoded_field in self.class_def.class_data.static_fields:
self.fields.append(DexField(self, encoded_field, False))
for encoded_field in self.class_def.class_data.instance_fields:
self.fields.append(DexField(self, encoded_field, True))
return self.fields
def get_super_cls_name(self):
return self.get_dex().get_typename(self.class_def.superclass_idx)
def get_method_mapping(self):
if self.method_mapping is None:
self.method_mapping = {}
def insert_method(encoded_method, virtual):
method = DexMethod(self, encoded_method, virtual)
name = method.get_name_in_file()
proto = method.get_pretty_proto()
name_mapping = self.method_mapping.setdefault(name, {})
if proto in name_mapping:
raise Exception(
"Unexpected duplicate method found: {}".format(method)
)
name_mapping[proto] = method
for encoded_method in self.class_def.class_data.direct_methods:
insert_method(encoded_method, False)
for encoded_method in self.class_def.class_data.virtual_methods:
insert_method(encoded_method, True)
return self.method_mapping
def get_raw_access_flags(self):
return str(self.class_def.access_flags)
def find_method(self, method_name, proto):
methods = self.get_method_mapping()
if method_name not in methods:
return None
proto_map = methods[method_name]
return proto_map.get(proto, None)
def get_line_number(self):
method_line_numbers = [
method.get_line_number()
for method in self.get_methods()
if not method.is_abstract() and method.get_line_number() > 0
]
if len(method_line_numbers) == 0:
return 0
return min(method_line_numbers)
class DexField:
"""Encapsulates a field within a DEX file."""
def __init__(self, dex_class, encoded_field, is_instance_field):
self.dex_class = dex_class
self.encoded_field = encoded_field
self.field_id = None
self.name_in_file = None
self.name = None
self.is_instance_field = is_instance_field
def __len__(self):
return len(self.encoded_field) if self.encoded_field else 0
def get_signature(self):
class_name = self.get_class().get_name()
field_name = self.get_name_in_file()
field_type = self.get_type()
return class_name + "." + field_name + ":" + field_type
def get_type(self):
return self.get_dex().get_typename(self.get_field_id().type_idx)
def get_field_id(self):
"""Get the field_id for this field."""
if self.field_id is None:
self.field_id = self.get_dex().get_field_id(self.encoded_field.field_idx)
return self.field_id
def get_field_index(self):
"""Get the method index into the method_ids array in the DEX file."""
return self.encoded_field.field_idx
def get_dex(self):
return self.dex_class.get_dex()
def get_name_in_file(self):
"""Returns the name of the field as it is known in the current DEX
file (no proguard remapping)"""
if self.name_in_file is None:
self.name_in_file = self.get_dex().get_string(self.get_field_id().name_idx)
return self.name_in_file
def get_name(self):
if self.name is None:
cls_mangled = self.get_class().get_mangled_name()
name_in_file = self.get_name_in_file()
if cls_mangled and name_in_file:
self.name = self.get_dex().demangle_class_method_name(
cls_mangled, name_in_file
)
if self.name is None:
self.name = name_in_file
return self.name
def get_class(self):
return self.dex_class
def is_public(self):
return bool(self.encoded_field.access_flags & AccessFlags.PUBLIC)
def is_private(self):
return bool(self.encoded_field.access_flags & AccessFlags.PRIVATE)
def is_protected(self):
return bool(self.encoded_field.access_flags & AccessFlags.PROTECTED)
def get_raw_access_flags(self):
return str(self.encoded_field.access_flags)
def demangle_classname(mangled):
if mangled and len(mangled) > 2 and mangled[0] == "L" and mangled[-1] == ";":
return mangled[1:-1].replace("/", ".") + ":"
# Already demangled
return mangled
def mangle_classname(demangled):
if (
demangled
and len(demangled) > 2
and (demangled[0] != "L" or demangled[-1] != ";")
):
return "L" + demangled.replace(".", "/") + ";"
# Already demangled
return demangled
class File:
"""Represents a DEX (Dalvik Executable) file"""
def __init__(
self, path, file_like=None, proguard_path=None, use_bytecode_format=False
):
self.path = path
self.proguard = None
if proguard_path and os.path.exists(proguard_path):
self.proguard = Progard(proguard_path)
if file_like is None:
file_like = open(path, "rb") # noqa: P201
self.use_bytecode_format = use_bytecode_format
self.data = file_extract.FileExtract(file_like, "=", 4)
self.header = header_item(self.data)
self.map_list = None
self.string_ids = None
self.type_ids = None
self.proto_ids = None
self.field_ids = None
self.method_ids = None
self.class_defs = None
self.classes = None
self.unsorted_classes = None
self.call_site_ids = None
self.method_handle_items = None
self.code_items = None
self.code_off_to_code_item_idx = {}
self.strings = None
self.call_sites = None
self.dex_classes = {}
self.debug_info_items = None
self.debug_info_items_total_size = None
def demangle_class_name(self, cls_mangled):
"""Given a mangled type name as it would appear in a DEX file like
"LX/JxK;", return the demangled version if we have a proguard file,
otherwise return the original class typename"""
if self.proguard:
cls_demangled = demangle_classname(cls_mangled)
if cls_demangled:
return self.proguard.lookup_class(cls_demangled)
return None
def demangle_class_method_name(self, cls_mangled, method_name):
if self.proguard:
cls_demangled = demangle_classname(cls_mangled)
if cls_demangled:
return self.proguard.lookup_method(cls_demangled, method_name)
return None
def get_map_list(self):
if self.map_list is None:
self.data.push_offset_and_seek(self.header.map_off)
self.map_list = map_list(self.data)
self.data.pop_offset_and_seek()
return self.map_list
def get_map_tuple(self, type_code):
map_list = self.get_map_list()
for item in map_list.list:
if item.type == type_code:
return (item.size, item.offset)
return (0, 0)
def get_debug_info_items_and_total_size(self):
if self.debug_info_items is None:
(size, offset) = self.get_map_tuple(TypeCode.DEBUG_INFO_ITEM)
if size == 0 or offset == 0:
return (None, None)
self.data.push_offset_and_seek(offset)
self.debug_info_items = []
for _ in range(size):
item = debug_info_item(self.data)
item.get_ops(reset_offset=False)
self.debug_info_items.append(item)
self.debug_info_items_total_size = self.data.tell() - offset
self.data.pop_offset_and_seek()
return (self.debug_info_items, self.debug_info_items_total_size)
def find_class(self, class_ref):
class_idx = class_ref
if isinstance(class_ref, str):
# Make sure the string is in 'L' <classname-with-slashes> ';'
class_mangled = mangle_classname(class_ref)
class_str_idx = self.find_string_idx(class_mangled)
if class_str_idx >= 0:
class_idx = self.find_type_idx(class_str_idx)
if isinstance(class_idx, numbers.Integral):
classes = self.get_classes(sort=False)
for cls in classes:
if cls.class_def.class_idx == class_idx:
return cls
return None
def find_string_idx(self, match_s):
match_key = match_s if isinstance(match_s, str) else match_s.data
class StringsWrapper:
def __init__(self, base):
self._base = base
def __getitem__(self, idx):
return self._base[idx].data
def __len__(self):
return len(self._base)
bisect_against = StringsWrapper(self.get_strings())
i = bisect.bisect_left(bisect_against, match_key)
if i != len(bisect_against) and bisect_against[i] == match_key:
return i
return -1
def get_string(self, index):
strings = self.get_strings()
if index < len(strings):
return file_extract.hex_escape(strings[index].data)
return None
def get_raw_typename(self, type_id):
types = self.get_type_ids()
if type_id < len(types):
return self.get_string(types[type_id])
return None
def get_typename(self, type_id):
raw_typename = self.get_raw_typename(type_id)
if self.use_bytecode_format:
return raw_typename
if raw_typename is None:
return None
array_level = 0
for c in raw_typename:
if c == "[":
array_level += 1
else:
break
raw_prefix = raw_typename[array_level]
raw_base_typename = raw_typename[array_level + 1 :]
if raw_prefix == "L":
assert raw_base_typename[-1] == ";"
return raw_base_typename[:-1].replace("/", ".") + "[]" * array_level
prefix_to_typename = {
"V": "void",
"Z": "boolean",
"B": "byte",
"S": "short",
"C": "char",
"I": "int",
"J": "long",
"F": "float",
"D": "double",
}
return prefix_to_typename[raw_prefix] + "[]" * array_level
def get_string_ids(self):
if self.string_ids is None:
self.string_ids = []
self.data.push_offset_and_seek(self.header.string_ids_off)
for _ in range(self.header.string_ids_size):
self.string_ids.append(self.data.get_uint32())
self.data.pop_offset_and_seek()
return self.string_ids
def get_type_ids(self):
if self.type_ids is None:
self.type_ids = []
self.data.push_offset_and_seek(self.header.type_ids_off)
for _ in range(self.header.type_ids_size):
self.type_ids.append(self.data.get_uint32())
self.data.pop_offset_and_seek()
return self.type_ids
def get_proto_ids(self):
if self.proto_ids is None:
self.proto_ids = []
self.data.push_offset_and_seek(self.header.proto_ids_off)
for _ in range(self.header.proto_ids_size):
self.proto_ids.append(proto_id_item(self.data, self))
self.data.pop_offset_and_seek()
return self.proto_ids
def get_proto_id(self, proto_idx):
proto_ids = self.get_proto_ids()
if proto_idx >= 0 and proto_idx < len(proto_ids):
return proto_ids[proto_idx]
return None
def get_proto_shorty(self, proto_idx):
id = self.get_proto_id(proto_idx)
return self.get_string(id.shorty_idx)
def get_proto_string(self, proto_idx):
proto = self.get_proto_id(proto_idx)
return_type = self.get_typename(proto.return_type_idx)
params = proto.get_parameters()
return "(%s)%s" % (
", ".join(
[self.get_typename(type_idx) for type_idx in params.list]
if params
else []
),
return_type,
)
def get_field_ids(self):
if self.field_ids is None:
self.field_ids = []
self.data.push_offset_and_seek(self.header.field_ids_off)
for _ in range(self.header.field_ids_size):
self.field_ids.append(field_id_item(self.data, self))
self.data.pop_offset_and_seek()
return self.field_ids
def get_field_id(self, field_ref):
field_ids = self.get_field_ids()
if field_ids:
if isinstance(field_ref, encoded_field):
if field_ref.field_idx < len(field_ids):
return field_ids[field_ref.field_id]
elif isinstance(field_ref, numbers.Integral):
if field_ref < len(field_ids):
return field_ids[field_ref]
else:
raise ValueError("invalid field_ref type %s" % (type(field_ref)))
return None
def get_method_ids(self):
if self.method_ids is None:
self.method_ids = []
self.data.push_offset_and_seek(self.header.method_ids_off)
for _ in range(self.header.method_ids_size):
self.method_ids.append(method_id_item(self.data, self))
self.data.pop_offset_and_seek()
return self.method_ids
def find_method_ids(self, method_name, class_ref=None):
dex_class = None
if class_ref is not None:
dex_class = self.find_class(class_ref)
matches = [] # Return a list of matching methods
method_ids = self.get_method_ids()
if not method_ids:
return matches
name_idx = self.find_string_idx(method_name)
if name_idx <= 0:
return matches
for method_id in method_ids:
if method_id.name_idx == name_idx:
if dex_class:
if method_id.class_idx != dex_class.class_def.class_idx:
continue
matches.append(method_id)
return matches
def find_method_id_by_code_offset(self, code_off):
class_defs = self.get_class_defs()
for class_def in class_defs:
method_id = class_def.find_encoded_method_by_code_off(code_off)
if method_id:
return method_id
return None
def get_method_id(self, method_ref):
"""method_ref can be one of:
- a encoded_method object
- integer method index"""
method_ids = self.get_method_ids()
if method_ids:
if isinstance(method_ref, encoded_method):
if method_ref.method_idx < len(method_ids):
return method_ids[method_ref.method_idx]
elif isinstance(method_ref, numbers.Integral):
if method_ref < len(method_ids):
return method_ids[method_ref]
else:
raise ValueError("invalid method_ref type %s" % (type(method_ref)))
return None
# def get_call_site(self, idx):
# call_site_ids = self.get_call_site_ids()
# if idx >= len(call_site_ids):
# return None
# if self.call_sites[idx] is None:
# self.data.push_offset_and_seek(call_site_ids[idx])
# self.call_sites[idx] = call_site_item(self.data)
# self.data.pop_offset_and_seek()
# return self.call_sites[idx]
def get_call_site_ids(self):
if self.call_site_ids is None:
self.call_site_ids = []
self.call_sites = []
(size, offset) = self.get_map_tuple(TypeCode.CALL_SITE_ID_ITEM)
self.data.push_offset_and_seek(offset)
for _ in range(size):
self.call_site_ids.append(self.data.get_uint32())
self.call_sites.append(None)
self.data.pop_offset_and_seek()
return self.call_site_ids
def get_method_handle_items(self):
if self.method_handle_items is None:
self.method_handle_items = []
(size, offset) = self.get_map_tuple(TypeCode.METHOD_HANDLE_ITEM)
self.data.push_offset_and_seek(offset)
for _ in range(size):
self.method_handle_items.append(method_handle_item(self.data))
self.data.pop_offset_and_seek()
return self.method_handle_items
def get_code_items(self):
if self.code_items is None:
self.code_items = []
(size, offset) = self.get_map_tuple(TypeCode.CODE_ITEM)
self.data.push_offset_and_seek(offset)
for i in range(size):
self.data.align_to(4)
item = code_item(self.data, self)
self.code_items.append(item)
self.code_off_to_code_item_idx[item.get_offset()] = i
self.data.pop_offset_and_seek()
return self.code_items
def report_code_duplication(self):
code_to_code_items = {}
code_items = self.get_code_items()
if code_items:
for code_item in code_items:
key = code_item.insns
if key in code_to_code_items:
code_to_code_items[key].append(code_item)
else:
code_to_code_items[key] = [code_item]
for key in code_to_code_items:
code_items = code_to_code_items[key]
if len(code_items) > 1:
print("-" * 72)
print("The following methods have the same code:")
for code_item in code_items:
method = self.find_method_from_code_off(code_item.get_offset())
if method.is_virtual:
print("virtual", end=" ")
else:
print("direct", end=" ")
print(method.get_qualified_name())
# Dump the code once for all methods
method.dump_code()
def get_class_def_index_from_offset(self, class_def_offset):
class_defs = self.get_class_defs()
for (i, class_def) in enumerate(class_defs):
if class_def.get_offset() == class_def_offset:
return i
return -1
def get_code_item_index_from_code_off(self, code_off):
# Make sure the code items are created
self.get_code_items()
if code_off in self.code_off_to_code_item_idx:
return self.code_off_to_code_item_idx[code_off]
return -1
def find_code_item(self, code_off):
code_item_idx = self.get_code_item_index_from_code_off(code_off)
if code_item_idx >= 0:
return self.get_code_items()[code_item_idx]
else:
raise ValueError("invalid code item offset %#8.8x" % code_off)
def find_method_from_code_off(self, code_off):
if code_off == 0:
return None
for cls in self.get_classes(sort=False):
for method in cls.get_methods(sort=False):
if method.get_code_offset() == code_off:
return method
return None
def get_class_defs(self):
if self.class_defs is None:
self.class_defs = []
self.data.push_offset_and_seek(self.header.class_defs_off)
for _ in range(self.header.class_defs_size):
class_def = class_def_item(self.data, self)
self.class_defs.append(class_def)
self.data.pop_offset_and_seek()
return self.class_defs
def get_classes(self, sort=True):
if self.classes is not None:
return self.classes
if self.unsorted_classes is not None and not sort:
return self.unsorted_classes
if not sort:
self.unsorted_classes = [
DexClass(self, class_def) for class_def in self.get_class_defs()
]
return self.unsorted_classes
if self.unsorted_classes is not None:
self.classes = sorted(
self.unsorted_classes, key=lambda cls: cls.get_line_number()
)
self.unsorted_classes = None
else:
self.classes = sorted(
(DexClass(self, class_def) for class_def in self.get_class_defs()),
key=lambda cls: cls.get_line_number(),
)
return self.classes
def get_strings(self):
if self.strings is None:
self.strings = []
for string_id_item in self.get_string_ids():
self.data.push_offset_and_seek(string_id_item)
self.strings.append(string_data_item(self.data))
self.data.pop_offset_and_seek()
return self.strings
def dump_header(self, options, f=sys.stdout):
self.header.dump(f=f)
f.write("\n")
def dump_map_list(self, options, f=sys.stdout):
self.get_map_list().dump(f=f)
f.write("\n")
def dump_string_ids(self, options, f=sys.stdout):
string_ids = self.get_string_ids()
if string_ids:
f.write("string_ids:\n")
for (i, item) in enumerate(self.get_strings()):
f.write("[%4u] %#8.8x ( " % (i, string_ids[i]))
item.dump(f=f)
f.write(")\n")
def dump_type_ids(self, options, f=sys.stdout):
type_ids = self.get_type_ids()
if type_ids:
f.write("\ntype_ids:\n DESCRIPTOR_IDX\n")
for (i, item) in enumerate(type_ids):
f.write('[%4u] %#8.8x ("%s")\n' % (i, item, self.get_string(item)))
def find_type_idx(self, class_str_idx):
types = self.get_type_ids()
i = bisect.bisect_left(types, class_str_idx)
if i != len(types) and types[i] == class_str_idx:
return i
return -1
def find_class_def_by_type_index(self, class_idx):
class_defs = self.get_class_defs()
for class_def in class_defs:
if class_def.class_idx == class_idx:
return class_def
return None
def dump_proto_ids(self, options, f=sys.stdout):
proto_ids = self.get_proto_ids()
if proto_ids:
f.write("\nproto_ids:\n")
f.write(" " * (6 + 1))
f.write(proto_id_item.get_table_header())
for (i, item) in enumerate(proto_ids):
f.write("[%4u] " % (i))
item.dump(f=f, print_name=False)
f.write("%s\n" % self.get_proto_string(i))
def dump_field_ids(self, options, f=sys.stdout):
field_ids = self.get_field_ids()
if field_ids:
f.write("\nfield_ids:\n")
f.write(" " * (6 + 1))
f.write(field_id_item.get_table_header())
for (i, item) in enumerate(field_ids):
f.write("[%4u] " % (i))
item.dump(f=f, print_name=False)
f.write(
" %s.%s:%s\n"
% (
self.get_typename(item.class_idx),
self.get_string(item.name_idx),
self.get_typename(item.type_idx),
)
)
def dump_class_method_ids(self, options, f=sys.stdout):
method_ids = self.get_method_ids()
if not method_ids:
return
f.write("\nmethod_ids:\n")
f.write(" " * (6 + 1))
f.write(method_id_item.get_table_header())
for dex_class in self.get_classes():
f.write("\nclass %s\n" % dex_class.get_name())
for dex_method in dex_class.get_methods():
method_idx = dex_method.encoded_method.method_idx
method_item = method_ids[method_idx]
f.write("[%4u] " % method_idx)
method_item.dump(f=f, print_name=False)
f.write(
" %s.%s:%s (%s)\n"
% (
self.get_typename(method_item.class_idx),
self.get_string(method_item.name_idx),
self.get_proto_string(method_item.proto_idx),
str(dex_method.encoded_method.get_access_flags()),
)
)
def dump_class_defs(self, options, f=sys.stdout):
class_defs = self.get_class_defs()
if class_defs:
f.write("\nclass_defs:\n")
f.write(" " * (6 + 1))
f.write(class_def_item.get_table_header())
for (i, item) in enumerate(class_defs):
f.write("[%4u] " % (i))
item.dump(f=f, print_name=False)
f.write(
" (%s, %s, (%s))"
% (
self.get_typename(item.class_idx),
self.get_typename(item.superclass_idx),
str(item.get_access_flags()),
)
)
f.write("\n")
def dump_call_site_ids(self, options, f=sys.stdout):
call_site_ids = self.get_call_site_ids()
if call_site_ids:
f.write("\ncall_site_ids:\n")
f.write(" " * (6 + 1))
for (i, item) in enumerate(call_site_ids):
f.write("[%4u] %#8.8x\n" % (i, item))
def dump_method_handle_items(self, options, f=sys.stdout):
method_handle_items = self.get_method_handle_items()
if method_handle_items:
f.write("\nmethod_handle_items:\n")
f.write(" " * (6 + 1))
for (i, item) in enumerate(method_handle_items):
f.write("[%4u] " % (i))
item.dump(f=f)
f.write("\n")
def dump_code(self, options, f=sys.stdout):
classes = self.get_classes()
if classes:
for cls in classes:
if options.skip_abstract and cls.is_abstract():
continue
cls.dump(options, f=f)
methods = cls.get_methods()
if options.dump_code or options.dump_debug_info or options.dump_all:
for method in methods:
method.dump(options, f=f)
f.write("\n")
def dump_code_items(self, options, f=sys.stdout):
code_items = self.get_code_items()
if code_items:
for (i, code_item) in enumerate(code_items):
f.write("code_item[%u]:\n" % (i))
code_item.dump(f=f)
def dump_debug_info_items(self, options, f=sys.stdout):
(debug_info_items, size) = self.get_debug_info_items_and_total_size()
if debug_info_items:
for item in debug_info_items:
item.dump_debug_info(f=f)
f.write("Total TYPE_DEBUG_INFO_ITEM size: {}\n\n".format(size))
def dump_structure(self, options, f=sys.stdout):
public_only = options.public_only
classes = self.get_classes()
for cls in classes:
if public_only and not cls.is_public():
continue
methods = cls.get_methods()
method_signature_to_access = {}
for method in methods:
if public_only and not method.is_public():
continue
method_signature_to_access[
method.get_signature()
] = method.get_raw_access_flags()
fields = cls.get_fields()
field_signature_to_access = {}
for field in fields:
if public_only and not field.is_public():
continue
field_signature_to_access[
field.get_signature()
] = field.get_raw_access_flags()
f.write(
"%s %s %s %d %d\n"
% (
cls.get_name(),
cls.get_raw_access_flags(),
cls.get_super_cls_name(),
len(method_signature_to_access),
len(field_signature_to_access),
)
)
for meth_signature, meth_access_flags in method_signature_to_access.items():
f.write(" M %s %s\n" % (meth_signature, meth_access_flags))
for (
field_signature,
field_access_flags,
) in field_signature_to_access.items():
f.write(" F %s %s\n" % (field_signature, field_access_flags))
def dump(self, options, f=sys.stdout):
self.dump_header(options, f)
f.write("\n")
self.dump_map_list(options, f)
self.dump_debug_info_items(options, f)
self.dump_string_ids(options, f)
self.dump_type_ids(options, f)
self.dump_proto_ids(options, f)
self.dump_field_ids(options, f)
self.dump_method_ids(options, f)
self.dump_class_defs(options, f)
self.dump_call_site_ids(options, f)
self.dump_method_handle_items(options, f)
self.dump_code(options, f)
self.dump_code_items(options, f)
def sign_extending(value, bit_width):
# is the highest bit (sign) set? (x>>(b-1)) would be faster
if value & (1 << (bit_width - 1)):
return value - (1 << bit_width) # 2s complement
return value
def get_signed_hex_offset_as_str(signed_offset, width):
if signed_offset < 0:
s = "-"
offset = abs(signed_offset)
else:
s = "+"
offset = signed_offset
if width == 2:
s += "%2.2x" % (offset & 0xFF)
elif width == 4:
s += "%4.4x" % (offset & 0xFFFF)
elif width == 8:
s += "%8.8x" % (offset & 0xFFFFFFFF)
else:
raise ValueError("only sizes of 2 4 or 8 are supported")
return s
class Opcode(object):
def __init__(self, inst):
self.inst = inst
def check_encoding(self, f=sys.stdout):
"""Verify that this instruction can't be encoded more efficiently"""
return 0 # Return zero to indicate we can't save any bytes
def new_encoding(self, f=sys.stdout):
"""Look for bytes we can save by making new opcodes that are encoded
as unsigned, or other optimizations"""
return 0 # Return zero to indicate we can't save any bytes
def get_op(self):
return self.inst.get_op()
def get_name(self):
op = self.get_op()
return self.ops[op]
def get_num_code_units(self):
return self.num_code_units
def regs_are_sequential(self):
if len(self.regs) <= 1:
return True
prev_reg = self.regs[0]
for i in range(1, len(self.regs)):
curr_reg = self.regs[i]
if prev_reg + 1 != curr_reg:
return False
return True
class Opcode00(Opcode):
ops = {0x00: "nop"}
num_code_units = 1
max_regs = 0
extra_data = "x"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.nature = inst.get_AA()
if self.nature == 0:
pass # NOP
elif self.nature == 1:
self.size = code_units.get_code_unit()
self.first_key = code_units.get_int()
self.targets = []
for _ in range(self.size):
self.targets.append(code_units.get_int())
elif self.nature == 2:
self.size = code_units.get_code_unit()
self.keys = []
self.targets = []
for _ in range(self.size):
self.keys.append(code_units.get_int())
for _ in range(self.size):
self.targets.append(code_units.get_int())
elif self.nature == 3:
self.element_width = code_units.get_code_unit()
self.size = code_units.get_uint()
num_code_units = int((self.size * self.element_width + 1) / 2)
encoder = file_extract.FileEncode(BytesIO(), "little", 4)
for _ in range(num_code_units):
encoder.put_uint16(code_units.get_code_unit())
encoder.seek(0)
self.data = encoder.file.getvalue()
else:
raise ValueError("add support for NOP nature %u" % (self.nature))
def get_name(self):
if self.nature == 0:
return self.ops[0]
elif self.nature == 1:
return "packed-switch-payload"
elif self.nature == 2:
return "sparse-switch-payload"
elif self.nature == 3:
return "fill-array-data-payload"
else:
raise ValueError("add support for NOP nature %u" % (self.nature))
def get_num_code_units(self):
if self.nature == 0:
return 1
elif self.nature == 1:
op_count = 1
size_count = 1
first_key_count = 2
keys_count = self.size * 2
return op_count + size_count + first_key_count + keys_count
elif self.nature == 2:
op_count = 1
size_count = 1
keys_and_targets_count = self.size * 4
return op_count + size_count + keys_and_targets_count
elif self.nature == 3:
op_count = 1
element_width_count = 2
return op_count + element_width_count + len(self.data)
else:
raise ValueError("add support for NOP nature %u" % (self.nature))
def dump(self, f=sys.stdout, context=None):
if self.nature == 0:
f.write("%s" % (self.get_name()))
elif self.nature == 1:
f.write("packed-switch-payload\n")
f.write("INDEX KEY TARGET\n===== --------- ---------\n")
for (i, target) in enumerate(self.targets):
f.write("[%4u] %+8.8x %+8.8x\n" % (i, self.first_key + i, target))
elif self.nature == 2:
f.write("sparse-switch-payload\n")
f.write("INDEX KEY TARGET\n===== --------- ---------\n")
for (i, key) in enumerate(self.keys):
f.write("[%4u] %+8.8x %+8.8x\n" % (i, key, self.targets[i]))
elif self.nature == 3:
f.write(
"fill-array-data-payload (elem_width = %u, size = %u)\n"
% (self.element_width, self.size)
)
file_extract.dump_memory(0, self.data, self.element_width, f)
def emulate(self, emulator):
pass
class Opcode01(Opcode):
ops = {0x01: "move"}
num_code_units = 1
max_regs = 2
extra_data = "x"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.regs = []
self.regs.append(inst.get_A())
self.regs.append(inst.get_B())
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u, v%u" % (self.get_name(), self.regs[0], self.regs[1]))
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode02(Opcode):
ops = {0x02: "move/from16"}
num_code_units = 2
max_regs = 2
extra_data = "x"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.regs = []
self.regs.append(inst.get_AA())
self.regs.append(inst[1])
def check_encoding(self, f=sys.stdout):
if self.regs[0] <= UINT4_MAX and self.regs[1] <= UINT4_MAX:
f.write('warning: "move/from16" can be encoded as a "move"')
f.write(" more efficiently as its registers are both <= %u\n" % (UINT4_MAX))
return 2
return 0
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u, v%u" % (self.get_name(), self.regs[0], self.regs[1]))
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode03(Opcode):
ops = {0x03: "move/16"}
num_code_units = 3
max_regs = 2
extra_data = "x"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.regs = []
self.regs.append(inst[1])
self.regs.append(inst[2])
def check_encoding(self, f=sys.stdout):
if self.regs[0] <= UINT4_MAX and self.regs[1] <= UINT4_MAX:
f.write('warning: "move/16" can be encoded as a "move"')
f.write(" more efficiently as its registers are both <= %u\n" % (UINT4_MAX))
return 4
if self.regs[0] <= UINT8_MAX:
f.write('warning: "move/16" can be encoded as a "move/from16"')
f.write(" more efficiently as its first register is <= %u\n" % (UINT8_MAX))
return 2
return 0
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u, v%u" % (self.get_name(), self.regs[0], self.regs[1]))
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode04(Opcode):
ops = {0x04: "move-wide"}
num_code_units = 1
max_regs = 2
extra_data = "x"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.regs = []
self.regs.append(inst.get_A())
self.regs.append(inst.get_B())
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u, v%u" % (self.get_name(), self.regs[0], self.regs[1]))
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode05(Opcode):
ops = {0x05: "move-wide/from16"}
num_code_units = 2
max_regs = 2
extra_data = "x"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.regs = []
self.regs.append(inst.get_AA())
self.regs.append(inst[1])
def check_encoding(self, f=sys.stdout):
if self.regs[0] <= UINT4_MAX and self.regs[1] <= UINT4_MAX:
f.write('warning: "move-wide/from16" can be encoded as a ')
f.write('"move-wide" more efficiently as its registers are ')
f.write("both <= %u\n" % (UINT4_MAX))
return 2
return 0
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u, v%u" % (self.get_name(), self.regs[0], self.regs[1]))
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode06(Opcode):
ops = {0x06: "move-wide/16"}
num_code_units = 3
max_regs = 2
extra_data = "x"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.regs = []
self.regs.append(inst[1])
self.regs.append(inst[2])
def check_encoding(self, f=sys.stdout):
if self.regs[0] <= UINT4_MAX and self.regs[1] <= UINT4_MAX:
f.write('warning: "move-wide/16" can be encoded as a "move-wide" ')
f.write("more efficiently as its registers are both <= %u\n" % (UINT4_MAX))
return 4
if self.regs[0] <= UINT8_MAX:
f.write('warning: "move-wide/16" can be encoded as a ')
f.write('"move-wide/from16" more efficiently as its first ')
f.write("register is <= %u\n" % (UINT8_MAX))
return 2
return 0
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u, v%u" % (self.get_name(), self.regs[0], self.regs[1]))
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode07(Opcode):
ops = {0x07: "move-object"}
num_code_units = 1
max_regs = 2
extra_data = "x"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.regs = []
self.regs.append(inst.get_A())
self.regs.append(inst.get_B())
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u, v%u" % (self.get_name(), self.regs[0], self.regs[1]))
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode08(Opcode):
ops = {0x08: "move-object/from16 "}
num_code_units = 2
max_regs = 2
extra_data = "x"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.regs = []
self.regs.append(inst.get_AA())
self.regs.append(inst[1])
def check_encoding(self, f=sys.stdout):
if self.regs[0] <= UINT4_MAX and self.regs[1] <= UINT4_MAX:
f.write('warning: "move-object/from16" can be encoded as a ')
f.write('"move-object" more efficiently as its registers are ')
f.write("both <= %u\n" % (UINT4_MAX))
return 2
return 0
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u, v%u" % (self.get_name(), self.regs[0], self.regs[1]))
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode09(Opcode):
ops = {0x09: "move-object/16"}
num_code_units = 3
max_regs = 2
extra_data = "x"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.regs = []
self.regs.append(inst[1])
self.regs.append(inst[2])
def check_encoding(self, f=sys.stdout):
if self.regs[0] <= UINT4_MAX and self.regs[1] <= UINT4_MAX:
f.write('warning: "move-object/16" can be encoded as a ')
f.write('"move-object" more efficiently as its registers ')
f.write("are both <= %u\n" % (UINT4_MAX))
return 4
if self.regs[0] <= UINT8_MAX:
f.write('warning: "move-object/16" can be encoded as a ')
f.write('"move-object/from16" more efficiently as its first ')
f.write("register is <= %u\n" % (UINT8_MAX))
return 2
return 0
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u, v%u" % (self.get_name(), self.regs[0], self.regs[1]))
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode0A_0D(Opcode):
ops = {
0x0A: "move-result",
0x0B: "move-result-wide",
0x0C: "move-result-object",
0x0D: "move-exception",
}
num_code_units = 1
max_regs = 1
extra_data = "x"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_AA()
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u" % (self.get_name(), self.reg))
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode0E(Opcode):
ops = {0x0E: "return-void"}
num_code_units = 1
max_regs = 0
extra_data = "x"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
def dump(self, f=sys.stdout, context=None):
f.write("%s" % (self.get_name()))
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode0F(Opcode):
ops = {0x0F: "return"}
num_code_units = 1
max_regs = 1
extra_data = "x"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_AA()
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u" % (self.get_name(), self.reg))
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode10(Opcode):
ops = {0x10: "return-wide"}
num_code_units = 1
max_regs = 1
extra_data = "x"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_AA()
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u" % (self.get_name(), self.reg))
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode11(Opcode):
ops = {0x11: "return-object"}
num_code_units = 1
max_regs = 1
extra_data = "x"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_AA()
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u" % (self.get_name(), self.reg))
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode12(Opcode):
ops = {0x12: "const/4"}
num_code_units = 1
max_regs = 1
extra_data = "n"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_A()
self.imm = sign_extending(inst[0] >> 12, 4)
def dump(self, f=sys.stdout, context=None):
f.write(
"%s v%u, #int %i // #%#x" % (self.get_name(), self.reg, self.imm, self.imm)
)
def emulate(self, emulator):
emulator.write_register(self.reg, self.imm)
class Opcode13(Opcode):
ops = {0x13: "const/16"}
num_code_units = 2
max_regs = 1
extra_data = "s"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_AA()
self.imm = sign_extending(inst[1], 16)
def check_encoding(self, f=sys.stdout):
if self.reg <= UINT4_MAX and INT4_MIN <= self.imm and self.imm <= INT4_MAX:
f.write('warning: "const/16" can be encoded as a "const/4" more ')
f.write("efficiently as its register is <= %u and " % (UINT4_MAX))
f.write("(%i <= %i <= %i)\n" % (INT4_MIN, self.imm, INT4_MAX))
return 2
return 0
def new_encoding(self, f=sys.stdout):
if (
self.reg <= UINT4_MAX
and self.imm > INT4_MAX
and self.imm <= (INT4_MAX + UINT4_MAX)
):
f.write('"const/16" could be encoded as a new "const/u4" stores ')
f.write("a 4 bit unsigned offset from +8 for a constant range ")
f.write("of [8-24):\n")
return 2
return 0
def dump(self, f=sys.stdout, context=None):
f.write(
"%s v%u, #int %i // #%#x" % (self.get_name(), self.reg, self.imm, self.imm)
)
def emulate(self, emulator):
emulator.write_register(self.reg, self.imm)
class Opcode14(Opcode):
ops = {0x14: "const"}
num_code_units = 3
max_regs = 1
extra_data = "i"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_AA()
self.imm = inst.get_uint32(1)
def check_encoding(self, f=sys.stdout):
if self.reg <= UINT8_MAX and INT16_MIN <= self.imm and self.imm <= INT16_MAX:
f.write('warning: "const" can be encoded as a "const/16" more ')
f.write("efficiently as its register is < %u " % (UINT8_MAX))
f.write("and (%i <= %i <= %i)\n" % (INT16_MIN, self.imm, INT16_MAX))
return 2
return 0
def new_encoding(self, f=sys.stdout):
if self.imm > INT16_MAX and self.imm <= (INT16_MAX + UINT16_MAX):
f.write('"const" could be encoded as a new "const/u16" stores a ')
f.write("16 bit unsigned offset from 32768 instead of a 16 bit ")
f.write("signed value\n")
return 2
return 0
def dump(self, f=sys.stdout, context=None):
f.write(
"%s v%u, #int %i // #%#x" % (self.get_name(), self.reg, self.imm, self.imm)
)
def emulate(self, emulator):
emulator.write_register(self.reg, self.imm)
class Opcode15(Opcode):
ops = {0x15: "const/high16"}
num_code_units = 2
max_regs = 1
extra_data = "h"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_AA()
self.imm = inst[1] << 16
def dump(self, f=sys.stdout, context=None):
f.write(
"%s v%u, #int %i // #%#x" % (self.get_name(), self.reg, self.imm, self.imm)
)
def emulate(self, emulator):
emulator.write_register(self.reg, self.imm)
class Opcode16(Opcode):
ops = {0x16: "const-wide/16"}
num_code_units = 2
max_regs = 1
extra_data = "s"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_AA()
self.imm = sign_extending(inst[1], 16)
def dump(self, f=sys.stdout, context=None):
f.write(
"%s v%u, #int %i // #%#x" % (self.get_name(), self.reg, self.imm, self.imm)
)
def emulate(self, emulator):
emulator.write_register(self.reg, self.imm)
class Opcode17(Opcode):
ops = {0x17: "const-wide/32"}
num_code_units = 3
max_regs = 1
extra_data = "i"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_AA()
self.imm = inst.get_sint32(1)
def check_encoding(self, f=sys.stdout):
if INT16_MIN <= self.imm and self.imm <= INT16_MAX:
f.write('warning: "const-wide/32" can be encoded as a ')
f.write(
'"const-wide/16" more efficiently as (%i <= %i <= %i)\n'
% (INT16_MIN, self.imm, INT16_MAX)
)
return 2
return 0
def new_encoding(self, f=sys.stdout):
if self.imm > INT16_MAX and self.imm <= (INT16_MAX + UINT16_MAX):
f.write('"const-wide/32" could be encoded as a new ')
f.write('"const-wide/u16" stores a 16 bit unsigned offset from ')
f.write("32768 instead of a 16 bit signed value\n")
return 2
return 0
def dump(self, f=sys.stdout, context=None):
f.write(
"%s v%u, #int %i // #%#x" % (self.get_name(), self.reg, self.imm, self.imm)
)
def emulate(self, emulator):
emulator.write_register(self.reg, self.imm)
class Opcode18(Opcode):
ops = {0x18: "const-wide/64"}
num_code_units = 5
max_regs = 1
extra_data = "l"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_AA()
self.imm = inst.get_uint64(1)
def check_encoding(self, f=sys.stdout):
if INT16_MIN <= self.imm and self.imm <= INT16_MAX:
f.write('warning: "const-wide/64" can be encoded as a ')
f.write(
'"const-wide/16" more efficiently as (%i <= %i <= %i)\n'
% (INT16_MIN, self.imm, INT16_MAX)
)
return 6
if INT32_MIN <= self.imm and self.imm <= INT32_MAX:
f.write('warning: "const-wide/64" can be encoded as a ')
f.write(
'"const-wide/32" more efficiently as (%i <= %i <= %i)\n'
% (INT32_MIN, self.imm, INT32_MAX)
)
return 4
return 0
def dump(self, f=sys.stdout, context=None):
f.write(
"%s v%u, #int %i // #%#x" % (self.get_name(), self.reg, self.imm, self.imm)
)
def emulate(self, emulator):
emulator.write_register(self.reg, self.imm)
class Opcode19(Opcode):
ops = {0x19: "const-wide/high16"}
num_code_units = 2
max_regs = 1
extra_data = "h"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_AA()
self.imm = sign_extending(inst[1], 16) << 48
def dump(self, f=sys.stdout, context=None):
f.write(
"%s v%u, #int %i // #%#x" % (self.get_name(), self.reg, self.imm, self.imm)
)
def emulate(self, emulator):
emulator.write_register(self.reg, self.imm)
class Opcode1A(Opcode):
ops = {0x1A: "const-string"}
num_code_units = 2
max_regs = 1
extra_data = "c"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_AA()
self.string_idx = inst[1]
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u, " % (self.get_name(), self.reg))
if context is not None:
f.write('"%s" // ' % context.get_string(self.string_idx))
f.write("string@%4.4x" % self.string_idx)
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode1B(Opcode):
ops = {0x1B: "const-string/jumbo"}
num_code_units = 3
max_regs = 1
extra_data = "c"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_AA()
self.string_idx = inst.get_uint32(1)
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u, " % (self.get_name(), self.reg))
if context is not None:
f.write('"%s" // ' % context.get_string(self.string_idx))
f.write("string@%8.8x" % self.string_idx)
def check_encoding(self, f=sys.stdout):
if self.signed_offset <= UINT16_MAX:
f.write('warning: "const-string/jumbo" can be encoded as a ')
f.write('"const-string" more efficiently as its offset is ')
f.write("<= UINT16_MAX\n")
return 2
return 0
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode1C(Opcode):
ops = {0x1C: "const-class"}
num_code_units = 2
max_regs = 1
extra_data = "c"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_AA()
self.type = inst[1]
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u, " % (self.get_name(), self.reg))
if context is not None:
f.write("%s // " % context.get_typename(self.type))
f.write("type@%4.4x" % self.type)
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode1D(Opcode):
ops = {0x1D: "monitor-enter"}
num_code_units = 1
max_regs = 1
extra_data = "x"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_AA()
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u" % (self.get_name(), self.reg))
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode1E(Opcode):
ops = {0x1E: "monitor-exit"}
num_code_units = 1
max_regs = 1
extra_data = "x"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_AA()
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u" % (self.get_name(), self.reg))
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode1F(Opcode):
ops = {0x1F: "check-cast"}
num_code_units = 2
max_regs = 1
extra_data = "c"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_AA()
self.type = inst[1]
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u, " % (self.get_name(), self.reg))
if context is not None:
f.write("%s // " % context.get_typename(self.type))
f.write("type@%4.4x" % self.type)
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode20(Opcode):
ops = {0x20: "instance-of"}
num_code_units = 2
max_regs = 2
extra_data = "c"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.regs = []
self.regs.append(inst.get_A())
self.regs.append(inst.get_B())
self.type = inst[1]
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u, v%u, " % (self.get_name(), self.regs[0], self.regs[1]))
if context is not None:
f.write("%s // " % context.get_typename(self.type))
f.write("type@%4.4x" % self.type)
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode21(Opcode):
ops = {0x21: "array-length"}
num_code_units = 1
max_regs = 2
extra_data = "x"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.regs = []
self.regs.append(inst.get_A())
self.regs.append(inst.get_B())
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u, v%u" % (self.get_name(), self.regs[0], self.regs[1]))
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode22(Opcode):
ops = {0x22: "new-instance"}
num_code_units = 2
max_regs = 1
extra_data = "c"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_AA()
self.type = inst[1]
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u, " % (self.get_name(), self.reg))
if context is not None:
f.write("%s // " % context.get_typename(self.type))
f.write("type@%4.4x" % self.type)
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode23(Opcode):
ops = {0x23: "new-array"}
num_code_units = 2
max_regs = 2
extra_data = "c"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.regs = []
self.regs.append(inst.get_A())
self.regs.append(inst.get_B())
self.type = inst[1]
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u, v%u " % (self.get_name(), self.regs[0], self.regs[1]))
if context is not None:
f.write("%s // " % context.get_typename(self.type))
f.write("type@%4.4x" % self.type)
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode24(Opcode):
ops = {0x24: "filled-new-array"}
num_code_units = 3
max_regs = 5
extra_data = "c"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
arg_count = inst[0] >> 12
self.type = inst[1]
self.regs = []
regs = inst[2] | ((inst[0] << 8) & 0xF0000)
for _ in range(arg_count):
self.regs.append(regs & 0xF)
regs >>= 4
def dump(self, f=sys.stdout, context=None):
f.write(
"%s {%s} "
% (self.get_name(), ", ".join(["v%u" % reg for reg in self.regs]))
)
if context is not None:
f.write("%s // " % context.get_typename(self.type))
f.write("type@%4.4x" % self.type)
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode25(Opcode):
ops = {0x25: "filled-new-array/range "}
num_code_units = 3
max_regs = "r"
extra_data = "c"
format = "3rc"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
arg_count = inst.get_AA()
self.type = inst[1]
first_reg = inst[2]
self.regs = []
for i in range(arg_count):
self.regs.append(first_reg + i)
def dump(self, f=sys.stdout, context=None):
f.write(
"%s {%s} "
% (self.get_name(), ", ".join(["v%u" % reg for reg in self.regs]))
)
if context is not None:
f.write("%s // " % context.get_typename(self.type))
f.write("type@%4.4x" % self.type)
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode26(Opcode):
ops = {0x26: "fill-array-data"}
num_code_units = 3
max_regs = 1
extra_data = "t"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_AA()
self.signed_offset = inst.get_sint32(1)
def dump(self, f=sys.stdout, context=None):
f.write(
"%s v%u, %8.8x // %s"
% (
self.get_name(),
self.reg,
self.inst.code_unit_idx + self.signed_offset,
get_signed_hex_offset_as_str(self.signed_offset, 8),
)
)
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode27(Opcode):
ops = {0x27: "throw"}
num_code_units = 1
max_regs = 1
extra_data = "x"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_AA()
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u" % (self.get_name(), self.reg))
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode28(Opcode):
ops = {0x28: "goto"}
num_code_units = 1
max_regs = 0
extra_data = "t"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.signed_offset = inst.get_signed_AA()
def check_encoding(self, f=sys.stdout):
if self.signed_offset == 0:
f.write('error: "goto" has a zero offset (invalid encoding)\n')
return 0
def dump(self, f=sys.stdout, context=None):
f.write(
"%s %4.4x // %+i"
% (
self.get_name(),
self.inst.code_unit_idx + self.signed_offset,
self.signed_offset,
)
)
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode29(Opcode):
ops = {0x29: "goto/16"}
num_code_units = 2
max_regs = 0
extra_data = "t"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.signed_offset = sign_extending(inst[1], 16)
def dump(self, f=sys.stdout, context=None):
f.write(
"%s %4.4x // %+i"
% (
self.get_name(),
self.inst.code_unit_idx + self.signed_offset,
self.signed_offset,
)
)
def check_encoding(self, f=sys.stdout):
if self.signed_offset == 0:
f.write('error: "goto/16" has a zero offset (invalid encoding)\n')
elif INT8_MIN <= self.signed_offset and self.signed_offset <= INT8_MAX:
f.write('warning: "goto/16" can be encoded as a "goto" more ')
f.write("efficiently since (INT8_MIN <= offset <= INT8_MAX)\n")
return 2
return 0
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode2A(Opcode):
ops = {0x2A: "goto/32"}
num_code_units = 3
max_regs = 0
extra_data = "t"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.signed_offset = inst.get_sint32(1)
def dump(self, f=sys.stdout, context=None):
f.write(
"%s %4.4x // %+i"
% (
self.get_name(),
self.inst.code_unit_idx + self.signed_offset,
self.signed_offset,
)
)
def check_encoding(self, f=sys.stdout):
if self.signed_offset == 0:
return 0
if INT8_MIN <= self.signed_offset and self.signed_offset <= INT8_MAX:
f.write('warning: "goto/32" can be encoded as a "goto" more ')
f.write("efficiently since (INT8_MIN <= offset <= INT8_MAX)\n")
return 2
if INT16_MIN <= self.signed_offset and self.signed_offset <= INT16_MAX:
f.write('warning: "goto/32" can be encoded as a "goto/16" more ')
f.write("efficiently since (INT16_MIN <= offset <= INT16_MAX)\n")
return 4
return 0
def new_encoding(self, f=sys.stdout):
if INT16_MIN <= self.signed_offset and self.signed_offset <= INT16_MAX:
return 0
if INT24_MIN <= self.signed_offset and self.signed_offset <= INT24_MAX:
f.write('"goto/32" could be encoded as a new "goto/16" where ')
f.write("that opcode uses the extra 8 bits in the first code ")
f.write("unit to provide a 24 bit branch range\n")
return 2
return 0
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode2B(Opcode):
ops = {0x2B: "packed-switch"}
num_code_units = 3
max_regs = 1
extra_data = "t"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_AA()
self.branch = inst.get_sint32(1)
def dump(self, f=sys.stdout, context=None):
f.write(
"%s v%u, %8.8x // +%8.8x"
% (
self.get_name(),
self.reg,
self.inst.get_code_unit_index() + self.branch,
self.branch,
)
)
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode2C(Opcode):
ops = {0x2C: "sparse-switch"}
num_code_units = 3
max_regs = 1
extra_data = "t"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_AA()
self.branch = inst.get_sint32(1)
def dump(self, f=sys.stdout, context=None):
f.write(
"%s v%u, %8.8x // +%8.8x"
% (
self.get_name(),
self.reg,
self.inst.get_code_unit_index() + self.branch,
self.branch,
)
)
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode2D_31(Opcode):
ops = {
0x2D: "cmpl-float (lt bias)",
0x2E: "cmpg-float (gt bias)",
0x2F: "cmpl-double (lt bias)",
0x30: "cmpg-double (gt bias)",
0x31: "cmp-long",
}
num_code_units = 2
max_regs = 3
extra_data = "x"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.regs = []
self.regs.append(inst.get_AA())
self.regs.append(inst.get_uint8_lo(1))
self.regs.append(inst.get_uint8_hi(1))
def dump(self, f=sys.stdout, context=None):
f.write(
"%s v%u, v%u, v%u"
% (self.get_name(), self.regs[0], self.regs[1], self.regs[2])
)
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode32_37(Opcode):
ops = {
0x32: "if-eq",
0x33: "if-ne",
0x34: "if-lt",
0x35: "if-ge",
0x36: "if-gt",
0x37: "if-le",
}
num_code_units = 2
max_regs = 2
extra_data = "t"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.regs = []
self.regs.append(inst.get_A())
self.regs.append(inst.get_B())
self.signed_offset = sign_extending(inst[1], 16)
def dump(self, f=sys.stdout, context=None):
f.write(
"%s v%u, v%u, %4.4x // %i"
% (
self.get_name(),
self.regs[0],
self.regs[1],
self.inst.code_unit_idx + self.signed_offset,
self.signed_offset,
)
)
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode38_3D(Opcode):
ops = {
0x38: "if-eqz",
0x39: "if-nez",
0x3A: "if-ltz",
0x3B: "if-gez",
0x3C: "if-gtz",
0x3D: "if-lez",
}
num_code_units = 2
max_regs = 1
extra_data = "c"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_AA()
self.signed_offset = sign_extending(inst[1], 16)
def dump(self, f=sys.stdout, context=None):
f.write(
"%s v%u, %4.4x // %s"
% (
self.get_name(),
self.reg,
self.signed_offset + self.inst.code_unit_idx,
get_signed_hex_offset_as_str(self.signed_offset, 4),
)
)
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode44_51(Opcode):
ops = {
0x44: "aget",
0x45: "aget-wide",
0x46: "aget-object",
0x47: "aget-boolean",
0x48: "aget-byte",
0x49: "aget-char",
0x4A: "aget-short",
0x4B: "aput",
0x4C: "aput-wide",
0x4D: "aput-object",
0x4E: "aput-boolean",
0x4F: "aput-byte",
0x50: "aput-char",
0x51: "aput-short",
}
num_code_units = 2
max_regs = 3
extra_data = "x"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.regs = []
self.regs.append(inst.get_AA())
self.regs.append(inst.get_uint8_lo(1))
self.regs.append(inst.get_uint8_hi(1))
def dump(self, f=sys.stdout, context=None):
f.write(
"%s v%u, v%u, v%u"
% (self.get_name(), self.regs[0], self.regs[1], self.regs[2])
)
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode52_5f(Opcode):
ops = {
0x52: "iget",
0x53: "iget-wide",
0x54: "iget-object",
0x55: "iget-boolean",
0x56: "iget-byte",
0x57: "iget-char",
0x58: "iget-short",
0x59: "iput",
0x5A: "iput-wide",
0x5B: "iput-object",
0x5C: "iput-boolean",
0x5D: "iput-byte",
0x5E: "iput-char",
0x5F: "iput-short",
}
num_code_units = 2
max_regs = 2
extra_data = "c"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.regs = []
self.regs.append(inst.get_A())
self.regs.append(inst.get_B())
self.field_idx = inst[1]
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u, v%u, " % (self.get_name(), self.regs[0], self.regs[1]))
if context is not None:
field_item = context.get_field_ids()[self.field_idx]
f.write(
"%s.%s:%s // "
% (
context.get_typename(field_item.class_idx),
context.get_string(field_item.name_idx),
context.get_typename(field_item.type_idx),
)
)
f.write("field@%4.4x" % self.field_idx)
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode60_6d(Opcode):
ops = {
0x60: "sget",
0x61: "sget-wide",
0x62: "sget-object",
0x63: "sget-boolean",
0x64: "sget-byte",
0x65: "sget-char",
0x66: "sget-short",
0x67: "sput",
0x68: "sput-wide",
0x69: "sput-object",
0x6A: "sput-boolean",
0x6B: "sput-byte",
0x6C: "sput-char",
0x6D: "sput-short",
}
num_code_units = 2
max_regs = 1
extra_data = "c"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.reg = inst.get_AA()
self.field_idx = inst.get_uint16(1)
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u, " % (self.get_name(), self.reg))
if context is not None:
field_item = context.get_field_ids()[self.field_idx]
f.write(
"%s.%s:%s // "
% (
context.get_typename(field_item.class_idx),
context.get_string(field_item.name_idx),
context.get_typename(field_item.type_idx),
)
)
f.write("field@%4.4x" % self.field_idx)
def emulate(self, emulator):
raise ValueError("emulate not supported")
can_use_new_encoding = 0
cant_use_new_encoding = 0
class Opcode6E_72(Opcode):
ops = {
0x6E: "invoke-virtual",
0x6F: "invoke-super",
0x70: "invoke-direct",
0x71: "invoke-static",
0x72: "invoke-interface",
}
num_code_units = 3
max_regs = 5
extra_data = "c"
format = "35c"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
arg_count = inst[0] >> 12
self.method_idx = inst[1]
self.regs = []
regs = inst[2] | ((inst[0] << 8) & 0xF0000)
for _ in range(arg_count):
self.regs.append(regs & 0xF)
regs >>= 4
def dump(self, f=sys.stdout, context=None):
f.write(
"%s {%s} "
% (self.get_name(), ", ".join(["v%u" % reg for reg in self.regs]))
)
if context is not None:
method_item = context.get_method_ids()[self.method_idx]
f.write(
"%s.%s:%s // "
% (
context.get_typename(method_item.class_idx),
context.get_string(method_item.name_idx),
context.get_proto_string(method_item.proto_idx),
)
)
f.write("method@%4.4x" % self.method_idx)
def new_encoding(self, f=sys.stdout):
if (
self.regs_are_sequential()
and (len(self.regs) == 0 or self.regs[0] <= UINT4_MAX)
and len(self.regs) <= UINT4_MAX
):
global can_use_new_encoding
can_use_new_encoding += 1
name = self.get_name()
f.write('"%s" can be encoded as "%s/min-range" ' % (name, name))
f.write("where the first register is contained in the first ")
f.write("opcode\n")
return 2
global cant_use_new_encoding
cant_use_new_encoding += 1
return 0
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode74_78(Opcode):
ops = {
0x74: "invoke-virtual/range",
0x75: "invoke-super/range",
0x76: "invoke-direct/range",
0x77: "invoke-static/range",
0x78: "invoke-interface/range",
}
num_code_units = 3
max_regs = "r"
extra_data = "c"
format = "3rc"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
arg_count = inst.get_AA()
self.method_idx = inst[1]
first_reg = inst[2]
self.regs = []
for i in range(arg_count):
self.regs.append(first_reg + i)
def dump(self, f=sys.stdout, context=None):
f.write(
"%s {%s} "
% (self.get_name(), ", ".join(["v%u" % reg for reg in self.regs]))
)
if context is not None:
method_item = context.get_method_ids()[self.method_idx]
f.write(
"%s.%s:%s // "
% (
context.get_typename(method_item.class_idx),
context.get_string(method_item.name_idx),
context.get_proto_string(method_item.proto_idx),
)
)
f.write("method@%4.4x" % self.method_idx)
def new_encoding(self, f=sys.stdout):
if (
self.regs_are_sequential()
and (len(self.regs) == 0 or self.regs[0] <= UINT4_MAX)
and len(self.regs) <= UINT4_MAX
):
name = self.get_name()
f.write('"%s" can be encoded as a "%s/min-range" ' % (name, name))
f.write("where the first register is contained in the first ")
f.write("opcode\n")
return 2
return 0
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode7B_8F(Opcode):
ops = {
0x7B: "neg-int",
0x7C: "not-int",
0x7D: "neg-long",
0x7E: "not-long",
0x7F: "neg-float",
0x80: "neg-double",
0x81: "int-to-long",
0x82: "int-to-float",
0x83: "int-to-double",
0x84: "long-to-int",
0x85: "long-to-float",
0x86: "long-to-double",
0x87: "float-to-int",
0x88: "float-to-long",
0x89: "float-to-double",
0x8A: "double-to-int",
0x8B: "double-to-long",
0x8C: "double-to-float",
0x8D: "int-to-byte",
0x8E: "int-to-char",
0x8F: "int-to-short",
}
num_code_units = 1
max_regs = 2
extra_data = "x"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.regs = []
self.regs.append(inst.get_A())
self.regs.append(inst.get_B())
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u, v%u" % (self.get_name(), self.regs[0], self.regs[1]))
def emulate(self, emulator):
raise ValueError("emulate not supported")
class Opcode90_AF(Opcode):
ops = {
0x90: "add-int",
0x91: "sub-int",
0x92: "mul-int",
0x93: "div-int",
0x94: "rem-int",
0x95: "and-int",
0x96: "or-int",
0x97: "xor-int",
0x98: "shl-int",
0x99: "shr-int",
0x9A: "ushr-int",
0x9B: "add-long",
0x9C: "sub-long",
0x9D: "mul-long",
0x9E: "div-long",
0x9F: "rem-long",
0xA0: "and-long",
0xA1: "or-long",
0xA2: "xor-long",
0xA3: "shl-long",
0xA4: "shr-long",
0xA5: "ushr-long",
0xA6: "add-float",
0xA7: "sub-float",
0xA8: "mul-float",
0xA9: "div-float",
0xAA: "rem-float",
0xAB: "add-double",
0xAC: "sub-double",
0xAD: "mul-double",
0xAE: "div-double",
0xAF: "rem-double",
}
num_code_units = 2
max_regs = 3
extra_data = "x"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.regs = []
self.regs.append(inst.get_AA())
self.regs.append(inst.get_uint8_lo(1))
self.regs.append(inst.get_uint8_hi(1))
def dump(self, f=sys.stdout, context=None):
f.write(
"%s v%u, v%u, v%u"
% (self.get_name(), self.regs[0], self.regs[1], self.regs[2])
)
def opIsCommutative(self):
"""Return True if the operation is commutative"""
op = self.get_op()
return (
op == 0x90 # add-int
or op == 0x92 # mul-int
or op == 0x95 # and-int
or op == 0x96 # or-int
or op == 0x97 # xor-int
or op == 0x9B # add-long
or op == 0x9D # mul-long
or op == 0xA0 # and-long
or op == 0xA1 # or-long
or op == 0xA2 # xor-long
or op == 0xA6 # add-float
or op == 0xA8 # mul-float
or op == 0xAB # add-double
or op == 0xAD # mul-double
)
def check_encoding(self, f=sys.stdout):
vAA = self.regs[0]
vBB = self.regs[1]
vCC = self.regs[2]
if vAA == vBB and vAA <= UINT4_MAX and vCC <= UINT4_MAX:
name = self.get_name()
f.write('warning: "%s" can be encoded more efficiently ' % (name))
f.write('as "%s/2addr v%u, v%u"\n' % (name, vAA, vCC))
return 2
if (
vAA == vCC
and vAA <= UINT4_MAX
and vBB <= UINT4_MAX
and self.opIsCommutative()
):
name = self.get_name()
f.write('warning: "%s" is commutative and can be ' % (name))
f.write(
'encoded more efficiently as "%s/2addr v%u, v%u"\n' % (name, vAA, vBB)
)
return 2
return 0 # Return zero to indicate we can't save any bytes
def emulate(self, emulator):
raise ValueError("emulate not supported")
class OpcodeB0_CF(Opcode):
ops = {
0xB0: "add-int/2addr",
0xB1: "sub-int/2addr",
0xB2: "mul-int/2addr",
0xB3: "div-int/2addr",
0xB4: "rem-int/2addr",
0xB5: "and-int/2addr",
0xB6: "or-int/2addr",
0xB7: "xor-int/2addr",
0xB8: "shl-int/2addr",
0xB9: "shr-int/2addr",
0xBA: "ushr-int/2addr",
0xBB: "add-long/2addr",
0xBC: "sub-long/2addr",
0xBD: "mul-long/2addr",
0xBE: "div-long/2addr",
0xBF: "rem-long/2addr",
0xC0: "and-long/2addr",
0xC1: "or-long/2addr",
0xC2: "xor-long/2addr",
0xC3: "shl-long/2addr",
0xC4: "shr-long/2addr",
0xC5: "ushr-long/2addr",
0xC6: "add-float/2addr",
0xC7: "sub-float/2addr",
0xC8: "mul-float/2addr",
0xC9: "div-float/2addr",
0xCA: "rem-float/2addr",
0xCB: "add-double/2addr",
0xCC: "sub-double/2addr",
0xCD: "mul-double/2addr",
0xCE: "div-double/2addr",
0xCF: "rem-double/2addr ",
}
num_code_units = 1
max_regs = 2
extra_data = "x"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.regs = []
self.regs.append(inst.get_A())
self.regs.append(inst.get_B())
def dump(self, f=sys.stdout, context=None):
f.write("%s v%u, v%u" % (self.get_name(), self.regs[0], self.regs[1]))
def emulate(self, emulator):
raise ValueError("emulate not supported")
class OpcodeD0_D7(Opcode):
ops = {
0xD0: "add-int/lit16",
0xD1: "rsub-int/lit16",
0xD2: "mul-int/lit16",
0xD3: "div-int/lit16",
0xD4: "rem-int/lit16",
0xD5: "and-int/lit16",
0xD6: "or-int/lit16",
0xD7: "xor-int/lit16",
}
num_code_units = 2
max_regs = 2
extra_data = "s"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.regs = []
self.regs.append(inst.get_A())
self.regs.append(inst.get_B())
self.imm = sign_extending(inst[1], 16)
def dump(self, f=sys.stdout, context=None):
f.write(
"%s v%u, v%u, #int %i // #%#x"
% (self.get_name(), self.regs[0], self.regs[1], self.imm, self.imm)
)
def emulate(self, emulator):
emulator.write_register(self.reg, self.imm)
class OpcodeD8_E2(Opcode):
ops = {
0xD8: "add-int/lit8",
0xD9: "rsub-int/lit8",
0xDA: "mul-int/lit8",
0xDB: "div-int/lit8",
0xDC: "rem-int/lit8",
0xDD: "and-int/lit8",
0xDE: "or-int/lit8",
0xDF: "xor-int/lit8",
0xE0: "shl-int/lit8",
0xE1: "shr-int/lit8",
0xE2: "ushr-int/lit8",
}
num_code_units = 2
max_regs = 2
extra_data = "b"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
self.regs = []
self.regs.append(inst.get_AA())
self.regs.append(inst.get_uint8_lo(1))
self.imm = sign_extending(inst.get_uint8_hi(1), 8)
def dump(self, f=sys.stdout, context=None):
f.write(
"%s v%u, v%u, #int %i // #%#x"
% (self.get_name(), self.regs[0], self.regs[1], self.imm, self.imm)
)
def emulate(self, emulator):
emulator.write_register(self.reg, self.imm)
class OpcodeFA(Opcode):
ops = {0xFA: "invoke-polymorphic"}
num_code_units = 4
max_regs = 5
extra_data = "cc"
def __init__(self, inst, code_units):
Opcode.__init__(self, inst)
raise ValueError("debug this when we find one of these")
arg_count = inst[0] >> 12
self.method_ref_idx = inst[1]
self.method_hdl_ref = inst[2]
self.regs = []
regs = inst[3] | ((inst[0] << 8) & 0xF0000)
self.proto = inst[4]
for _ in range(arg_count):
self.regs.append(regs & 0xF)
regs >>= 4
def dump(self, f=sys.stdout, context=None):
f.write(
"%s {%s} "
% (self.get_name(), ", ".join(["v%u" % reg for reg in self.regs]))
)
if context is not None:
f.write("%s // " % context.get_typename(self.type))
f.write("type@%4.4x" % self.type)
def emulate(self, emulator):
raise ValueError("emulate not supported")
class CodeUnits(Opcode):
def __init__(self, code_units):
self.code_units = code_units
self.idx = 0
def index_is_valid(self):
return self.idx < len(self.code_units)
def get_index(self):
return self.idx
def peek_code_unit(self, idx):
return self.code_units[idx]
def get_int(self):
return sign_extending(self.get_uint(), 32)
def get_uint(self):
return self.get_code_unit() | (self.get_code_unit() << 16)
def get_code_unit(self):
idx = self.idx
self.idx += 1
return self.code_units[idx]
def swap16(u):
return ((u >> 8) & 0x00FF) | ((u << 8) & 0xFF00)
class DexInstruction(object):
opcode_defs = []
@classmethod
def initialize(cls):
opcode_classes = [
Opcode00,
Opcode01,
Opcode02,
Opcode03,
Opcode04,
Opcode05,
Opcode06,
Opcode07,
Opcode08,
Opcode09,
Opcode0A_0D,
Opcode0E,
Opcode0F,
Opcode10,
Opcode11,
Opcode12,
Opcode13,
Opcode14,
Opcode15,
Opcode16,
Opcode17,
Opcode18,
Opcode19,
Opcode1A,
Opcode1B,
Opcode1C,
Opcode1D,
Opcode1E,
Opcode1F,
Opcode20,
Opcode21,
Opcode22,
Opcode23,
Opcode24,
Opcode25,
Opcode26,
Opcode27,
Opcode28,
Opcode29,
Opcode2A,
Opcode2B,
Opcode2C,
Opcode2D_31,
Opcode32_37,
Opcode38_3D,
Opcode44_51,
Opcode52_5f,
Opcode60_6d,
Opcode6E_72,
Opcode74_78,
Opcode7B_8F,
Opcode90_AF,
OpcodeB0_CF,
OpcodeD0_D7,
OpcodeD8_E2,
OpcodeFA,
]
for _ in range(256):
cls.opcode_defs.append(None)
for opcode_class in opcode_classes:
for op in opcode_class.ops:
if cls.opcode_defs[op] is None:
cls.opcode_defs[op] = opcode_class
else:
raise ValueError(
"registering the same opcode twice: "
"%#2.2x in %s" % (op, str(opcode_class))
)
def dump(self, f=sys.stdout, suffix="\n", context=None):
f.write("%4.4x:" % (self.code_unit_idx))
for code_unit in self.code_units:
f.write(" %4.4x" % (swap16(code_unit)))
num_code_units = len(self.code_units)
if num_code_units < 5:
pad = 5 - num_code_units
for _ in range(pad):
f.write(" ")
f.write(" ")
self.instruction.dump(f=f, context=context)
if suffix:
f.write(suffix)
def __init__(self):
self.code_unit_idx = -1
self.code_units = None
def check_encoding(self, f=sys.stdout):
bytes_saved = self.instruction.check_encoding(f)
if bytes_saved:
self.dump(f)
return bytes_saved
def new_encoding(self, f=sys.stdout):
bytes_saved = self.instruction.new_encoding(f)
if bytes_saved:
self.dump(f)
return bytes_saved
def get_code_unit_index(self):
return self.code_unit_idx
def decode(self, code_units):
self.code_unit_idx = code_units.get_index()
self.code_units = []
self.code_units.append(code_units.get_code_unit())
op = self.get_op()
opcode_class = self.opcode_defs[op]
if opcode_class is None:
raise ValueError("unsupported opcode %#4.4x" % (swap16(self[0])))
for _ in range(1, opcode_class.num_code_units):
self.code_units.append(code_units.get_code_unit())
self.instruction = opcode_class(self, code_units)
def get_name(self):
return self.instruction.get_name()
def get_num_code_units(self):
return self.instruction.get_num_code_units()
def get_op(self):
"""Return the 1 byte op field that tells us what instruction this is"""
return self.code_units[0] & 0xFF
def get_A(self):
"""Get the 4 bit value of A"""
return (self.code_units[0] >> 8) & 0xF
def get_B(self):
"""Get the 4 bit value of B"""
return (self.code_units[0] >> 12) & 0xF
def get_AA(self):
"""Get the 8 bit value of AA from the byte next to the Op"""
return self.get_uint8_hi(0)
def get_signed_AA(self):
return sign_extending(self.get_AA(), 8)
def get_uint8_lo(self, idx):
return self.code_units[idx] & 0xFF
def get_sint8_lo(self, idx):
return sign_extending(self.get_uint8_lo(), 8)
def get_uint8_hi(self, idx):
return (self.code_units[idx] >> 8) & 0xFF
def get_sint8_hi(self, idx):
return sign_extending(self.get_uint8_hi(), 8)
def get_uint16(self, idx):
return self.code_units[idx]
def get_sint16(self, idx):
return sign_extending(self.get_uint16(), 16)
def get_uint32(self, idx):
return self.code_units[idx + 1] << 16 | self.code_units[idx]
def get_sint32(self, idx):
return sign_extending(self.get_uint32(idx), 32)
def get_uint64(self, idx):
return (
self.code_units[idx + 3] << 48
| self.code_units[idx + 2] << 32
| self.code_units[idx + 1] << 16
| self.code_units[idx]
)
def get_sint64(self, idx):
return sign_extending(self.get_uint64(idx), 64)
def __len__(self):
"""Overload the length operator to give out the number of code units"""
return len(self.code_units)
def __getitem__(self, key):
"""Overload the [] operator to give out code units"""
return self.code_units[key]
def emulate(self, emulator):
self.instruction.emulate(emulator)
DexInstruction.initialize()
def get_percentage(part, total):
return (float(part) / float(total)) * 100.0
def print_code_stats(size, total_size, file_size):
code_savings = get_percentage(size, total_size)
file_savings = get_percentage(size, file_size)
print(
"error: %u of %u code bytes (%u file bytes) " % (size, total_size, file_size),
end="",
)
print("could be saved by encoding opcodes more efficiently ", end="")
print(
"(%2.2f%% code savings, %2.2f%% file savings).\n" % (code_savings, file_savings)
)
def print_debug_stats(size, file_size):
file_savings = get_percentage(size, file_size)
print("error: %u debug info bytes of %u file " % (size, file_size), end="")
print("bytes could be saved by encoding debug info more ", end="")
print("efficiently (%2.2f%% file savings).\n" % (file_savings))
def print_encoding_stats(size, total_size, file_size):
code_savings = get_percentage(size, total_size)
file_savings = get_percentage(size, file_size)
print("%u of %u code bytes could be saved " % (size, total_size), end="")
print("could be saved by encoding opcodes more efficiently ", end="")
print(
"(%2.2f%% code savings, %2.2f%% file savings).\n" % (code_savings, file_savings)
)
class DexEmulator(object):
def __init__(self):
self.registers = {}
self.pc = 0
def read_register(self, reg):
if reg in self.registers:
return self.registers[reg]
raise ValueError("reading register with no value")
def write_register(self, reg, value):
self.registers[reg] = value
def emulate(self, uint16_array):
pass
def main():
usage = "Usage: dex.py [options] [dex file(s)]"
parser = optparse.OptionParser(
usage=usage, description="A script that parses DEX files."
)
parser.add_option(
"-v",
"--verbose",
action="store_true",
dest="verbose",
help="display verbose debug info",
default=False,
)
parser.add_option(
"-C",
"--color",
action="store_true",
dest="color",
help="Enable colorized output",
default=False,
)
parser.add_option(
"-a",
"--all",
action="store_true",
dest="dump_all",
help="Dump all DEX sections.",
default=False,
)
parser.add_option(
"-H",
"--header",
action="store_true",
dest="dump_header",
help="Dump the DEX file header.",
default=False,
)
parser.add_option(
"--map-list",
action="store_true",
dest="dump_map_list",
help="Dump the DEX map list info.",
default=False,
)
parser.add_option(
"-s",
"--strings",
action="store_true",
dest="dump_strings",
help="Dump the DEX strings.",
default=False,
)
parser.add_option(
"-t",
"--types",
action="store_true",
dest="dump_types",
help="Dump the DEX types.",
default=False,
)
parser.add_option(
"-p",
"--protos",
action="store_true",
dest="dump_protos",
help="Dump the DEX protos.",
default=False,
)
parser.add_option(
"-f",
"--fields",
action="store_true",
dest="dump_fields",
help="Dump the DEX fields.",
default=False,
)
parser.add_option(
"-m",
"--methods",
action="store_true",
dest="dump_methods",
help="Dump the DEX methods.",
default=False,
)
parser.add_option(
"--method-handles",
action="store_true",
dest="dump_method_handles",
help="Dump the DEX method handles.",
default=False,
)
parser.add_option(
"--class-list",
action="store_true",
dest="dump_class_list",
help="Dump the list of DEX classes.",
default=False,
)
parser.add_option(
"--classes",
action="store_true",
dest="dump_classes",
help="Dump the DEX classes.",
default=False,
)
parser.add_option(
"--class",
dest="class_filter",
help="Find a class by name. " + "Accepts `Lpath/to/Class;` or `path.to.Class`",
default=None,
)
parser.add_option(
"--method",
dest="method_filter",
help="Find a method by name. Must be used with --class",
default=None,
)
parser.add_option(
"--call-sites",
action="store_true",
dest="dump_call_sites",
help="Dump the DEX call sites.",
default=False,
)
parser.add_option(
"--code",
action="store_true",
dest="dump_code",
help="Dump the DEX code in all class methods.",
default=False,
)
parser.add_option(
"--code-items",
action="store_true",
dest="dump_code_items",
help="Dump the DEX code items.",
default=False,
)
parser.add_option(
"--code-duplication",
action="store_true",
dest="code_duplication",
help=("Dump any methods in the DEX file that have the " "same instructions."),
default=False,
)
parser.add_option(
"--debug-info",
action="store_true",
dest="dump_debug_info",
help="Dump the DEX debug info for each method.",
default=False,
)
parser.add_option(
"--debug-info-items",
action="store_true",
dest="dump_debug_info_items",
help="Dump the DEX debug info items pointed to in its" + " map_list",
default=False,
)
parser.add_option(
"--stats",
action="store_true",
dest="dump_stats",
help="Dump the DEX opcode statistics.",
default=False,
)
parser.add_option(
"--check-encoding",
action="store_true",
dest="check_encoding",
help="Verify opcodes are efficiently encoded.",
default=False,
)
parser.add_option(
"--new-encoding",
action="store_true",
dest="new_encoding",
help="Report byte savings from potential new encodings.",
default=False,
)
parser.add_option(
"--proguard",
dest="proguard",
help="Specify a progard file to use for demangling.",
default=None,
)
parser.add_option(
"--skip-abstract",
action="store_true",
dest="skip_abstract",
help="Don't print information coming from abstract"
" classes when passing --code, --debug or --all.",
default=False,
)
parser.add_option(
"--counts",
action="store_true",
dest="dump_counts",
help="Dump the DEX opcode counts",
default=False,
)
parser.add_option(
"--use-bytecode-format",
action="store_true",
dest="use_bytecode_format",
help="When passed, switch from java to bytecode format.",
)
parser.add_option(
"--public-only",
action="store_true",
dest="public_only",
help="Only dump classes / methods / fields that are public",
default=False,
)
parser.add_option(
"--dump-structure",
action="store_true",
dest="dump_structure",
help="Dumps just the names of all classes / methods / fields",
default=False,
)
(options, files) = parser.parse_args()
total_code_bytes_inefficiently_encoded = 0
total_debug_info_bytes_inefficiently_encoded = 0
total_new_code_bytes_inefficiently_encoded = 0
total_opcode_byte_size = 0
total_file_size = 0
op_name_to_size = {}
op_name_to_count = {}
string_counts = {}
i = 0
if len(files) == 0:
print("No input files. {}".format(usage))
return
def generate_dex_objects(files):
for path in files:
base = os.path.basename(path)
ext = os.path.splitext(path)[1]
def handle_zip(zip_file, path, name):
# Naive implementation uses ZipFile entries which are file-like:
# info = zip_file.getinfo(name)
# return (path, info.file_size, file.open(info))
# Problem is that performance is abysmal. So we unpack into
# memory.
info = zip_file.getinfo(name)
data = zip_file.read(info)
return (path, info.file_size, io.BytesIO(data))
# Special handling for direct zip access.
if "!" in base and ext == ".dex":
zip_path = os.path.join(os.path.dirname(path), base[0 : base.find("!")])
name = base[base.find("!") + 1 :]
file = zipfile.ZipFile(zip_path, "r")
names = set(file.namelist())
if name not in names:
print("%s does not contain %s" % (zip_path, name))
break
yield handle_zip(file, path, name)
continue
if ext == ".dex":
# Plain dex file, open as file.
yield (path, os.path.getsize(path), open(path, "rb"))
continue
if ext == ".apk" or ext == ".jar" or ext == ".zip":
file = zipfile.ZipFile(path, "r")
names = set(file.namelist())
if "classes.dex" not in names:
print("%s does not contain classes.dex" % path)
break
yield handle_zip(file, path + "!classes.dex", "classes.dex")
for i in range(2, 100000):
name = "classes%d.dex" % i
if name not in names:
break
yield handle_zip(file, path + "!" + name, name)
continue
print("error: dex.py does not know how to handle %s" % path)
break
for path, file_size, file_like in generate_dex_objects(files):
print("Dex file: %s" % (path))
total_file_size += file_size
dex = File(path, file_like, options.proguard, options.use_bytecode_format)
if options.class_filter:
dex_class = dex.find_class(options.class_filter)
if dex_class:
if options.method_filter is None:
dex_class.dump(options)
for method in dex_class.get_methods():
method_name = method.get_name()
if options.method_filter:
if options.method_filter != method_name:
continue
method.dump(options)
else:
print(
'error: class definition not found for "%s"'
% (options.class_filter)
)
if options.dump_classes:
for dex_class in dex.get_classes():
dex_class.dump(options)
for method in dex_class.get_methods():
method.dump(options)
if options.dump_header or options.dump_all:
dex.dump_header(options)
if options.dump_map_list or options.dump_all:
dex.dump_map_list(options)
if options.dump_debug_info_items or options.dump_all:
dex.dump_debug_info_items(options)
if options.dump_strings or options.dump_all:
dex.dump_string_ids(options)
if options.dump_types or options.dump_all:
dex.dump_type_ids(options)
if options.dump_protos or options.dump_all:
dex.dump_proto_ids(options)
if options.dump_fields or options.dump_all:
dex.dump_field_ids(options)
if options.dump_methods or options.dump_all:
dex.dump_class_method_ids(options)
if options.dump_class_list or options.dump_all:
dex.dump_class_defs(options)
if options.dump_call_sites or options.dump_all:
dex.dump_call_site_ids(options)
if options.dump_method_handles or options.dump_all:
dex.dump_method_handle_items(options)
if options.dump_code or options.dump_all:
dex.dump_code(options)
if options.dump_code_items:
dex.dump_code_items(options)
if options.dump_structure:
dex.dump_structure(options)
if (
options.dump_stats
or options.check_encoding
or options.new_encoding
or options.dump_counts
):
if options.dump_stats:
for string_item in dex.get_strings():
if string_item.data not in string_counts:
string_counts[string_item.data] = 0
string_counts[string_item.data] += 1
code_bytes_inefficiently_encoded = 0
debug_info_bytes_inefficiently_encoded = 0
new_code_bytes_inefficiently_encoded = 0
file_opcodes_byte_size = 0
classes = dex.get_classes()
used_code_item_indexes = []
for cls in classes:
methods = cls.get_methods()
for method in methods:
opcodes_bytes_size = method.get_code_byte_size()
file_opcodes_byte_size += opcodes_bytes_size
total_opcode_byte_size += opcodes_bytes_size
if (
options.dump_stats
or options.check_encoding
or options.new_encoding
or options.dump_counts
):
for dex_inst in method.get_instructions():
if options.dump_stats:
op_name = dex_inst.get_name()
size = dex_inst.get_num_code_units() * 2
if op_name not in op_name_to_size:
op_name_to_size[op_name] = 0
op_name_to_size[op_name] += size
if options.dump_counts:
op_name = dex_inst.get_name()
if op_name not in op_name_to_count:
op_name_to_count[op_name] = 0
op_name_to_count[op_name] += 1
if options.check_encoding:
code_bytes_inefficiently_encoded += (
dex_inst.check_encoding()
)
if options.new_encoding:
new_code_bytes_inefficiently_encoded += (
dex_inst.new_encoding()
)
if options.check_encoding:
code_item_idx = method.get_code_item_index()
if code_item_idx >= 0:
used_code_item_indexes.append(code_item_idx)
debug_info = method.get_debug_info()
if debug_info:
debug_info_bytes_inefficiently_encoded += (
method.check_debug_info_encoding()
)
if options.check_encoding:
efficiently_encoded = True
if code_bytes_inefficiently_encoded > 0:
efficiently_encoded = False
total_code_bytes_inefficiently_encoded += (
code_bytes_inefficiently_encoded
)
print_code_stats(
code_bytes_inefficiently_encoded,
file_opcodes_byte_size,
file_size,
)
if debug_info_bytes_inefficiently_encoded > 0:
efficiently_encoded = False
total_debug_info_bytes_inefficiently_encoded += (
debug_info_bytes_inefficiently_encoded
)
print_debug_stats(debug_info_bytes_inefficiently_encoded, file_size)
# Verify that all code items are used.
used_code_item_indexes.sort()
prev_ci_idx = 0
for ci_idx in used_code_item_indexes:
if ci_idx != prev_ci_idx:
efficiently_encoded = False
for idx in range(prev_ci_idx + 1, ci_idx):
print(
"code_item[%u] is not used and its "
"code_item can be removed" % (idx)
)
prev_ci_idx = ci_idx
if efficiently_encoded:
print("file is efficiently encoded.")
if options.new_encoding:
if new_code_bytes_inefficiently_encoded > 0:
total_new_code_bytes_inefficiently_encoded += (
new_code_bytes_inefficiently_encoded
)
print_encoding_stats(
new_code_bytes_inefficiently_encoded,
file_opcodes_byte_size,
file_size,
)
else:
print("file is efficiently encoded.")
if options.code_duplication:
dex.report_code_duplication()
if options.dump_stats:
duped_strings_byte_size = 0
for s in string_counts:
count = string_counts[s]
if count > 1:
s_len = len(s)
duped_strings_byte_size += (count - 1) * s_len + get_uleb128_byte_size(
s_len
)
if duped_strings_byte_size > 0:
print(
"%u bytes in duplicated strings across dex files."
% (duped_strings_byte_size)
)
print("BYTESIZE %AGE OPCODE")
print("======== ===== =================================")
sorted_x = sorted(op_name_to_size.items(), key=operator.itemgetter(1))
for (op_name, byte_size) in sorted_x:
percentage = get_percentage(byte_size, total_opcode_byte_size)
print("%-8u %5.2f %s" % (byte_size, percentage, op_name))
print("-------- ----- ---------------------------------")
print("%-8u 100.0" % (total_opcode_byte_size))
if options.dump_counts:
print("COUNT OPCODE")
print("======== =================================")
for op_name, count in op_name_to_count.items():
print("%-8u %s" % (count, op_name))
if i > 0:
if options.check_encoding:
if total_code_bytes_inefficiently_encoded > 0:
print_code_stats(
total_code_bytes_inefficiently_encoded,
total_opcode_byte_size,
total_file_size,
)
if total_debug_info_bytes_inefficiently_encoded > 0:
efficiently_encoded = False
print_debug_stats(
total_debug_info_bytes_inefficiently_encoded, total_file_size
)
if options.new_encoding:
invoke_kind_percentage = get_percentage(
can_use_new_encoding, can_use_new_encoding + cant_use_new_encoding
)
print(
"%u invoke-kind opcodes could use new encoding"
% (can_use_new_encoding),
end="",
)
print(
"%u could not (%2.2f%%)"
% (cant_use_new_encoding, invoke_kind_percentage)
)
if total_new_code_bytes_inefficiently_encoded > 0:
print_encoding_stats(
total_new_code_bytes_inefficiently_encoded,
total_opcode_byte_size,
total_file_size,
)
if __name__ == "__main__":
main()