in tools/python/dex.py [0:0]
def main():
usage = "Usage: dex.py [options] [dex file(s)]"
parser = optparse.OptionParser(
usage=usage, description="A script that parses DEX files."
)
parser.add_option(
"-v",
"--verbose",
action="store_true",
dest="verbose",
help="display verbose debug info",
default=False,
)
parser.add_option(
"-C",
"--color",
action="store_true",
dest="color",
help="Enable colorized output",
default=False,
)
parser.add_option(
"-a",
"--all",
action="store_true",
dest="dump_all",
help="Dump all DEX sections.",
default=False,
)
parser.add_option(
"-H",
"--header",
action="store_true",
dest="dump_header",
help="Dump the DEX file header.",
default=False,
)
parser.add_option(
"--map-list",
action="store_true",
dest="dump_map_list",
help="Dump the DEX map list info.",
default=False,
)
parser.add_option(
"-s",
"--strings",
action="store_true",
dest="dump_strings",
help="Dump the DEX strings.",
default=False,
)
parser.add_option(
"-t",
"--types",
action="store_true",
dest="dump_types",
help="Dump the DEX types.",
default=False,
)
parser.add_option(
"-p",
"--protos",
action="store_true",
dest="dump_protos",
help="Dump the DEX protos.",
default=False,
)
parser.add_option(
"-f",
"--fields",
action="store_true",
dest="dump_fields",
help="Dump the DEX fields.",
default=False,
)
parser.add_option(
"-m",
"--methods",
action="store_true",
dest="dump_methods",
help="Dump the DEX methods.",
default=False,
)
parser.add_option(
"--method-handles",
action="store_true",
dest="dump_method_handles",
help="Dump the DEX method handles.",
default=False,
)
parser.add_option(
"--class-list",
action="store_true",
dest="dump_class_list",
help="Dump the list of DEX classes.",
default=False,
)
parser.add_option(
"--classes",
action="store_true",
dest="dump_classes",
help="Dump the DEX classes.",
default=False,
)
parser.add_option(
"--class",
dest="class_filter",
help="Find a class by name. " + "Accepts `Lpath/to/Class;` or `path.to.Class`",
default=None,
)
parser.add_option(
"--method",
dest="method_filter",
help="Find a method by name. Must be used with --class",
default=None,
)
parser.add_option(
"--call-sites",
action="store_true",
dest="dump_call_sites",
help="Dump the DEX call sites.",
default=False,
)
parser.add_option(
"--code",
action="store_true",
dest="dump_code",
help="Dump the DEX code in all class methods.",
default=False,
)
parser.add_option(
"--code-items",
action="store_true",
dest="dump_code_items",
help="Dump the DEX code items.",
default=False,
)
parser.add_option(
"--code-duplication",
action="store_true",
dest="code_duplication",
help=("Dump any methods in the DEX file that have the " "same instructions."),
default=False,
)
parser.add_option(
"--debug-info",
action="store_true",
dest="dump_debug_info",
help="Dump the DEX debug info for each method.",
default=False,
)
parser.add_option(
"--debug-info-items",
action="store_true",
dest="dump_debug_info_items",
help="Dump the DEX debug info items pointed to in its" + " map_list",
default=False,
)
parser.add_option(
"--stats",
action="store_true",
dest="dump_stats",
help="Dump the DEX opcode statistics.",
default=False,
)
parser.add_option(
"--check-encoding",
action="store_true",
dest="check_encoding",
help="Verify opcodes are efficiently encoded.",
default=False,
)
parser.add_option(
"--new-encoding",
action="store_true",
dest="new_encoding",
help="Report byte savings from potential new encodings.",
default=False,
)
parser.add_option(
"--proguard",
dest="proguard",
help="Specify a progard file to use for demangling.",
default=None,
)
parser.add_option(
"--skip-abstract",
action="store_true",
dest="skip_abstract",
help="Don't print information coming from abstract"
" classes when passing --code, --debug or --all.",
default=False,
)
parser.add_option(
"--counts",
action="store_true",
dest="dump_counts",
help="Dump the DEX opcode counts",
default=False,
)
parser.add_option(
"--use-bytecode-format",
action="store_true",
dest="use_bytecode_format",
help="When passed, switch from java to bytecode format.",
)
parser.add_option(
"--public-only",
action="store_true",
dest="public_only",
help="Only dump classes / methods / fields that are public",
default=False,
)
parser.add_option(
"--dump-structure",
action="store_true",
dest="dump_structure",
help="Dumps just the names of all classes / methods / fields",
default=False,
)
(options, files) = parser.parse_args()
total_code_bytes_inefficiently_encoded = 0
total_debug_info_bytes_inefficiently_encoded = 0
total_new_code_bytes_inefficiently_encoded = 0
total_opcode_byte_size = 0
total_file_size = 0
op_name_to_size = {}
op_name_to_count = {}
string_counts = {}
i = 0
if len(files) == 0:
print("No input files. {}".format(usage))
return
def generate_dex_objects(files):
for path in files:
base = os.path.basename(path)
ext = os.path.splitext(path)[1]
def handle_zip(zip_file, path, name):
# Naive implementation uses ZipFile entries which are file-like:
# info = zip_file.getinfo(name)
# return (path, info.file_size, file.open(info))
# Problem is that performance is abysmal. So we unpack into
# memory.
info = zip_file.getinfo(name)
data = zip_file.read(info)
return (path, info.file_size, io.BytesIO(data))
# Special handling for direct zip access.
if "!" in base and ext == ".dex":
zip_path = os.path.join(os.path.dirname(path), base[0 : base.find("!")])
name = base[base.find("!") + 1 :]
file = zipfile.ZipFile(zip_path, "r")
names = set(file.namelist())
if name not in names:
print("%s does not contain %s" % (zip_path, name))
break
yield handle_zip(file, path, name)
continue
if ext == ".dex":
# Plain dex file, open as file.
yield (path, os.path.getsize(path), open(path, "rb"))
continue
if ext == ".apk" or ext == ".jar" or ext == ".zip":
file = zipfile.ZipFile(path, "r")
names = set(file.namelist())
if "classes.dex" not in names:
print("%s does not contain classes.dex" % path)
break
yield handle_zip(file, path + "!classes.dex", "classes.dex")
for i in range(2, 100000):
name = "classes%d.dex" % i
if name not in names:
break
yield handle_zip(file, path + "!" + name, name)
continue
print("error: dex.py does not know how to handle %s" % path)
break
for path, file_size, file_like in generate_dex_objects(files):
print("Dex file: %s" % (path))
total_file_size += file_size
dex = File(path, file_like, options.proguard, options.use_bytecode_format)
if options.class_filter:
dex_class = dex.find_class(options.class_filter)
if dex_class:
if options.method_filter is None:
dex_class.dump(options)
for method in dex_class.get_methods():
method_name = method.get_name()
if options.method_filter:
if options.method_filter != method_name:
continue
method.dump(options)
else:
print(
'error: class definition not found for "%s"'
% (options.class_filter)
)
if options.dump_classes:
for dex_class in dex.get_classes():
dex_class.dump(options)
for method in dex_class.get_methods():
method.dump(options)
if options.dump_header or options.dump_all:
dex.dump_header(options)
if options.dump_map_list or options.dump_all:
dex.dump_map_list(options)
if options.dump_debug_info_items or options.dump_all:
dex.dump_debug_info_items(options)
if options.dump_strings or options.dump_all:
dex.dump_string_ids(options)
if options.dump_types or options.dump_all:
dex.dump_type_ids(options)
if options.dump_protos or options.dump_all:
dex.dump_proto_ids(options)
if options.dump_fields or options.dump_all:
dex.dump_field_ids(options)
if options.dump_methods or options.dump_all:
dex.dump_class_method_ids(options)
if options.dump_class_list or options.dump_all:
dex.dump_class_defs(options)
if options.dump_call_sites or options.dump_all:
dex.dump_call_site_ids(options)
if options.dump_method_handles or options.dump_all:
dex.dump_method_handle_items(options)
if options.dump_code or options.dump_all:
dex.dump_code(options)
if options.dump_code_items:
dex.dump_code_items(options)
if options.dump_structure:
dex.dump_structure(options)
if (
options.dump_stats
or options.check_encoding
or options.new_encoding
or options.dump_counts
):
if options.dump_stats:
for string_item in dex.get_strings():
if string_item.data not in string_counts:
string_counts[string_item.data] = 0
string_counts[string_item.data] += 1
code_bytes_inefficiently_encoded = 0
debug_info_bytes_inefficiently_encoded = 0
new_code_bytes_inefficiently_encoded = 0
file_opcodes_byte_size = 0
classes = dex.get_classes()
used_code_item_indexes = []
for cls in classes:
methods = cls.get_methods()
for method in methods:
opcodes_bytes_size = method.get_code_byte_size()
file_opcodes_byte_size += opcodes_bytes_size
total_opcode_byte_size += opcodes_bytes_size
if (
options.dump_stats
or options.check_encoding
or options.new_encoding
or options.dump_counts
):
for dex_inst in method.get_instructions():
if options.dump_stats:
op_name = dex_inst.get_name()
size = dex_inst.get_num_code_units() * 2
if op_name not in op_name_to_size:
op_name_to_size[op_name] = 0
op_name_to_size[op_name] += size
if options.dump_counts:
op_name = dex_inst.get_name()
if op_name not in op_name_to_count:
op_name_to_count[op_name] = 0
op_name_to_count[op_name] += 1
if options.check_encoding:
code_bytes_inefficiently_encoded += (
dex_inst.check_encoding()
)
if options.new_encoding:
new_code_bytes_inefficiently_encoded += (
dex_inst.new_encoding()
)
if options.check_encoding:
code_item_idx = method.get_code_item_index()
if code_item_idx >= 0:
used_code_item_indexes.append(code_item_idx)
debug_info = method.get_debug_info()
if debug_info:
debug_info_bytes_inefficiently_encoded += (
method.check_debug_info_encoding()
)
if options.check_encoding:
efficiently_encoded = True
if code_bytes_inefficiently_encoded > 0:
efficiently_encoded = False
total_code_bytes_inefficiently_encoded += (
code_bytes_inefficiently_encoded
)
print_code_stats(
code_bytes_inefficiently_encoded,
file_opcodes_byte_size,
file_size,
)
if debug_info_bytes_inefficiently_encoded > 0:
efficiently_encoded = False
total_debug_info_bytes_inefficiently_encoded += (
debug_info_bytes_inefficiently_encoded
)
print_debug_stats(debug_info_bytes_inefficiently_encoded, file_size)
# Verify that all code items are used.
used_code_item_indexes.sort()
prev_ci_idx = 0
for ci_idx in used_code_item_indexes:
if ci_idx != prev_ci_idx:
efficiently_encoded = False
for idx in range(prev_ci_idx + 1, ci_idx):
print(
"code_item[%u] is not used and its "
"code_item can be removed" % (idx)
)
prev_ci_idx = ci_idx
if efficiently_encoded:
print("file is efficiently encoded.")
if options.new_encoding:
if new_code_bytes_inefficiently_encoded > 0:
total_new_code_bytes_inefficiently_encoded += (
new_code_bytes_inefficiently_encoded
)
print_encoding_stats(
new_code_bytes_inefficiently_encoded,
file_opcodes_byte_size,
file_size,
)
else:
print("file is efficiently encoded.")
if options.code_duplication:
dex.report_code_duplication()
if options.dump_stats:
duped_strings_byte_size = 0
for s in string_counts:
count = string_counts[s]
if count > 1:
s_len = len(s)
duped_strings_byte_size += (count - 1) * s_len + get_uleb128_byte_size(
s_len
)
if duped_strings_byte_size > 0:
print(
"%u bytes in duplicated strings across dex files."
% (duped_strings_byte_size)
)
print("BYTESIZE %AGE OPCODE")
print("======== ===== =================================")
sorted_x = sorted(op_name_to_size.items(), key=operator.itemgetter(1))
for (op_name, byte_size) in sorted_x:
percentage = get_percentage(byte_size, total_opcode_byte_size)
print("%-8u %5.2f %s" % (byte_size, percentage, op_name))
print("-------- ----- ---------------------------------")
print("%-8u 100.0" % (total_opcode_byte_size))
if options.dump_counts:
print("COUNT OPCODE")
print("======== =================================")
for op_name, count in op_name_to_count.items():
print("%-8u %s" % (count, op_name))
if i > 0:
if options.check_encoding:
if total_code_bytes_inefficiently_encoded > 0:
print_code_stats(
total_code_bytes_inefficiently_encoded,
total_opcode_byte_size,
total_file_size,
)
if total_debug_info_bytes_inefficiently_encoded > 0:
efficiently_encoded = False
print_debug_stats(
total_debug_info_bytes_inefficiently_encoded, total_file_size
)
if options.new_encoding:
invoke_kind_percentage = get_percentage(
can_use_new_encoding, can_use_new_encoding + cant_use_new_encoding
)
print(
"%u invoke-kind opcodes could use new encoding"
% (can_use_new_encoding),
end="",
)
print(
"%u could not (%2.2f%%)"
% (cant_use_new_encoding, invoke_kind_percentage)
)
if total_new_code_bytes_inefficiently_encoded > 0:
print_encoding_stats(
total_new_code_bytes_inefficiently_encoded,
total_opcode_byte_size,
total_file_size,
)