opt/optimize_enums/EnumTransformer.cpp (1,133 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #include "EnumTransformer.h" #include "Creators.h" #include "DexAsm.h" #include "DexClass.h" #include "EnumUpcastAnalysis.h" #include "Mutators.h" #include "OptData.h" #include "Resolver.h" #include "Show.h" #include "StlUtil.h" #include "TypeReference.h" #include "UsedVarsAnalysis.h" #include "Walkers.h" /** * We already get a set of candidate enums which are safe to be replaced with * Integer objects from EnumUpcastAnalysis, we do the transformation in the * EnumTransformer in following steps. * 1. Create an enum helper class LEnumUtils; with some helper methods and * singleton Integer fields, Integer f0, f1, f2 .... * 2. Update instructions. * -- invoke-virtual LCandidateEnum;.ordinal()I => * Ljava/lang/Integer;.intValue:()I * -- invoke-static LCandidateEnum;.values():[LCandidateEnum; => * LEnumUtils;.values(I)[Integer * -- invoke-virtual LCandidateEnum;.compareTo:(Object)I => * Ljava/lang/Integer;.compareTo:(Integer)I * -- invoke-virtual LCandidateEnum;.equals:(Object)Z => * Ljava/lang/Integer;.equals:(Object)Z * -- sget-object LCandidateEnum;.f:LCandidateEnum; => * LEnumUtils;.f?:Ljava/lang/Integer; * or construct a new integer if the enum is allowed to be optimized * unsafely. * -- invoke-virtual LCandidateEnum;.name:()String => * LCandidateEnum;.redex$OE$name:(Integer)String * -- invoke-virtual LCandidateEnum;.hashCode:()I => * LCandidateEnum;.redex$OE$hashCode:(Integer)I * -- invoke-static LCandidateEnum;.valueOf:(String)LCandidateEnum; => * LCandidateEnum;.redex$OE$valueOf:(String)Integer * * If CandidateEnum.toString() overrides Enum.toString() * -- invoke-virtual LCandidateEnum;.toString:()String => * LCandidateEnum;.toString$REDEX$YCYa1bLthVk:(Integer)String * otherwise * -- invoke-virtual LCandidateEnum;.toString:()String => * LCandidateEnum;.redex$OE$name:(Integer)String * * We also make all virtual methods and instance direct methods to be static * and keep them in their original class while also changing their invocations * to static. * 3. Clean up the static fields of candidate enums and update these enum * classes to inherit java.lang.Object instead of java.lang.Enum. * 4. Update specs of methods and fields based on name mangling. */ namespace { using namespace optimize_enums; using namespace dex_asm; using EnumAttributeMap = std::unordered_map<DexType*, EnumAttributes>; namespace ptrs = local_pointers; /** * A structure holding the enum utils and constant values. */ struct EnumUtil { std::vector<DexFieldRef*> m_fields; // Store the needed helper methods for toString(), valueOf() and other // invocations at Code transformation phase, then implement these methods // later. ConcurrentSet<DexMethodRef*> m_substitute_methods; // Store virtual and direct methods of candidate enums that will be // made static later. ConcurrentSet<DexMethod*> m_instance_methods; // Store methods for getting instance fields to be generated later. ConcurrentMap<DexFieldRef*, DexMethodRef*> m_get_instance_field_methods; DexMethodRef* m_values_method_ref = nullptr; const Config& m_config; const DexString* CLINIT_METHOD_STR = DexString::make_string("<clinit>"); const DexString* REDEX_NAME = DexString::make_string("redex$OE$name"); const DexString* REDEX_HASHCODE = DexString::make_string("redex$OE$hashCode"); const DexString* REDEX_STRING_VALUEOF = DexString::make_string("redex$OE$String_valueOf"); const DexString* REDEX_VALUEOF = DexString::make_string("redex$OE$valueOf"); const DexString* INIT_METHOD_STR = DexString::make_string("<init>"); const DexString* VALUES_METHOD_STR = DexString::make_string("values"); const DexString* VALUEOF_METHOD_STR = DexString::make_string("valueOf"); const DexType* ENUM_TYPE = type::java_lang_Enum(); DexType* INT_TYPE = type::_int(); DexType* INTEGER_TYPE = type::java_lang_Integer(); DexType* OBJECT_TYPE = type::java_lang_Object(); DexType* STRING_TYPE = type::java_lang_String(); DexType* SERIALIZABLE_TYPE = DexType::make_type("Ljava/io/Serializable;"); DexType* COMPARABLE_TYPE = DexType::make_type("Ljava/lang/Comparable;"); DexType* RTEXCEPTION_TYPE = DexType::make_type("Ljava/lang/RuntimeException;"); DexType* ILLEGAL_ARG_EXCP_TYPE = DexType::make_type("Ljava/lang/IllegalArgumentException;"); const DexMethodRef* ENUM_ORDINAL_METHOD = method::java_lang_Enum_ordinal(); const DexMethodRef* ENUM_EQUALS_METHOD = DexMethod::make_method("Ljava/lang/Enum;.equals:(Ljava/lang/Object;)Z"); const DexMethodRef* ENUM_COMPARETO_METHOD = DexMethod::make_method("Ljava/lang/Enum;.compareTo:(Ljava/lang/Enum;)I"); const DexMethodRef* ENUM_TOSTRING_METHOD = DexMethod::make_method("Ljava/lang/Enum;.toString:()Ljava/lang/String;"); const DexMethodRef* ENUM_HASHCODE_METHOD = DexMethod::make_method("Ljava/lang/Enum;.hashCode:()I"); const DexMethodRef* ENUM_NAME_METHOD = method::java_lang_Enum_name(); const DexMethodRef* STRING_VALUEOF_METHOD = DexMethod::make_method( "Ljava/lang/String;.valueOf:(Ljava/lang/Object;)Ljava/lang/String;"); const DexMethodRef* STRINGBUILDER_APPEND_OBJ_METHOD = DexMethod::make_method( "Ljava/lang/StringBuilder;.append:(Ljava/lang/Object;)Ljava/lang/" "StringBuilder;"); DexMethodRef* STRING_HASHCODE_METHOD = DexMethod::make_method("Ljava/lang/String;.hashCode:()I"); DexMethodRef* STRINGBUILDER_APPEND_STR_METHOD = DexMethod::make_method( "Ljava/lang/StringBuilder;.append:(Ljava/lang/String;)Ljava/lang/" "StringBuilder;"); DexMethodRef* INTEGER_INTVALUE_METHOD = method::java_lang_Integer_intValue(); DexMethodRef* INTEGER_EQUALS_METHOD = DexMethod::make_method( "Ljava/lang/Integer;.equals:(Ljava/lang/Object;)Z"); DexMethodRef* INTEGER_COMPARETO_METHOD = DexMethod::make_method( "Ljava/lang/Integer;.compareTo:(Ljava/lang/Integer;)I"); DexMethodRef* INTEGER_VALUEOF_METHOD = method::java_lang_Integer_valueOf(); DexMethodRef* RTEXCEPTION_CTOR_METHOD = DexMethod::make_method( "Ljava/lang/RuntimeException;.<init>:(Ljava/lang/String;)V"); DexMethodRef* ILLEGAL_ARG_CONSTRUCT_METHOD = DexMethod::make_method( "Ljava/lang/IllegalArgumentException;.<init>:(Ljava/lang/String;)V"); DexMethodRef* STRING_EQ_METHOD = DexMethod::make_method("Ljava/lang/String;.equals:(Ljava/lang/Object;)Z"); explicit EnumUtil(const Config& config) : m_config(config) {} void create_util_class(DexStoresVector* stores, uint32_t fields_count) { uint32_t fields_in_primary = std::min(fields_count, m_config.max_enum_size); DexClass* cls = make_enumutils_class(fields_in_primary); auto& dexen = (*stores)[0].get_dexen()[0]; dexen.push_back(cls); } bool is_super_type_of_candidate_enum(DexType* type) { return type == ENUM_TYPE || type == OBJECT_TYPE || type == SERIALIZABLE_TYPE || type == COMPARABLE_TYPE; } /** * IF LCandidateEnum; is a candidate enum: * LCandidateEnum; => Ljava/lang/Integer; * [LCandidateEnum; => [Ljava/lang/Integer; * [[LCandidateEnum; => [[Ljava/lang/Integer; * ... * IF it is not a candidate enum, return nullptr. */ DexType* try_convert_to_int_type(const EnumAttributeMap& enum_attributes_map, DexType* type) const { uint32_t level = type::get_array_level(type); DexType* elem_type = type; if (level) { elem_type = type::get_array_element_type(type); } if (enum_attributes_map.count(elem_type)) { return level ? type::make_array_type(INTEGER_TYPE, level) : INTEGER_TYPE; } return nullptr; } /** * Return method ref to * LCandidateEnum;.redex$OE$String_valueOf:(Integer)String, a substitute for * String.valueOf:(Object) while the argument is a candidate enum object. * Store the method ref at the same time. * * The implemmentation of the substitute method depends on the substitute * method of LCandidateEnum;.toString:()String. */ DexMethodRef* add_substitute_of_stringvalueof(DexType* enum_type) { add_substitute_of_tostring(enum_type); auto proto = DexProto::make_proto( STRING_TYPE, DexTypeList::make_type_list({INTEGER_TYPE})); auto method = DexMethod::make_method(enum_type, REDEX_STRING_VALUEOF, proto); m_substitute_methods.insert(method); return method; } /** * Return method ref to LCandidateEnum;.redex$OE$valueOf(String):Integer, a * substitute for LCandidateEnum;.valueOf:(String)LCandidateEnum;. * Store the method ref at the same time. */ DexMethodRef* add_substitute_of_valueof(DexType* enum_type) { auto proto = DexProto::make_proto( INTEGER_TYPE, DexTypeList::make_type_list({STRING_TYPE})); auto method = DexMethod::make_method(enum_type, REDEX_VALUEOF, proto); m_substitute_methods.insert(method); return method; } /** * If `Enum.toString` is not overridden, return method ref to * LCandidateEnum;.redex$OE$name:(Integer)String, a substitute for * LCandidateEnum;.toString:()String. Otherwise return the overriding method. * Store the method ref at the same time. */ DexMethodRef* add_substitute_of_tostring(DexType* enum_type) { auto method_ref = get_user_defined_tostring_method(type_class(enum_type)); if (!method_ref) { return add_substitute_of_name(enum_type); } else { auto method = resolve_method(method_ref, MethodSearch::Virtual); always_assert(method); return method_ref; } } /** * If `Enum.toString` is not overridden, return method ref to * LCandidateEnum;.redex$OE$name:(Integer)String. Otherwise return the * overriding method. */ DexMethodRef* get_substitute_of_tostring(DexType* enum_type) { DexMethodRef* method = get_user_defined_tostring_method(type_class(enum_type)); if (!method) { return get_substitute_of_name(enum_type); } return method; } /** * Return method ref to LCandidateEnum;.redex$OE$name:(Integer)String, a * substitute for LCandidateEnum;.name:()String. * Store the method ref at the same time. */ DexMethodRef* add_substitute_of_name(DexType* enum_type) { auto method = get_substitute_of_name(enum_type); m_substitute_methods.insert(method); return method; } /** * Return method ref to LCandidateEnum;.redex$OE$name:(Integer)String */ DexMethodRef* get_substitute_of_name(DexType* enum_type) { auto proto = DexProto::make_proto( STRING_TYPE, DexTypeList::make_type_list({INTEGER_TYPE})); auto method = DexMethod::make_method(enum_type, REDEX_NAME, proto); return method; } /** * Returns a method ref to LCandidateEnum;.redex$OE$hashCode:(Integer)I, a * substitute for LCandidateEnum;.hashCode:()I. * Store the method ref at the same time. */ DexMethodRef* add_substitute_of_hashcode(DexType* enum_type) { // `redex$OE$hashCode()` uses `redex$OE$name()` so we better make sure // the method exists. add_substitute_of_name(enum_type); auto method = get_substitute_of_hashcode(enum_type); m_substitute_methods.insert(method); return method; } /** * Returns a method ref to LCandidateEnum;.redex$OE$hashCode:(Integer)I */ DexMethodRef* get_substitute_of_hashcode(DexType* enum_type) { auto proto = DexProto::make_proto( INT_TYPE, DexTypeList::make_type_list({INTEGER_TYPE})); auto method = DexMethod::make_method(enum_type, REDEX_HASHCODE, proto); return method; } /** * Returns a method ref to LCandidateEnum;.redex$OE$get_iField:(Integer)X * where `X` is the type of the instance field `iField`. * Store the method ref at the same time. */ DexMethodRef* add_get_ifield_method(DexType* enum_type, DexFieldRef* ifield) { if (m_get_instance_field_methods.count(ifield)) { return m_get_instance_field_methods.at(ifield); } auto proto = DexProto::make_proto( ifield->get_type(), DexTypeList::make_type_list({INTEGER_TYPE})); auto method_name = DexString::make_string("redex$OE$get_" + ifield->str()); auto method = DexMethod::make_method(enum_type, method_name, proto); m_get_instance_field_methods.insert(std::make_pair(ifield, method)); return method; } /** * Returns the `LCandidateEnum.toString()` method that overrides * `Enum.toString()`. Return `nullptr` if `Enum.toString()` is not overridden. */ DexMethod* get_user_defined_tostring_method(DexClass* cls) { static ConcurrentMap<DexClass*, DexMethod*> cache; if (cache.count(cls)) { return cache.at(cls); } for (auto vmethod : cls->get_vmethods()) { if (method::signatures_match(vmethod, ENUM_TOSTRING_METHOD)) { cache.insert(std::make_pair(cls, vmethod)); return vmethod; } } cache.insert(std::make_pair(cls, nullptr)); return nullptr; } private: /** * Create a helper class for enums. */ DexClass* make_enumutils_class(uint32_t fields_count) { // Note that the EnumUtilsFieldAnalyzer does pattern matching on fields of // the form $EnumUtils.fXXX, and should be kept in sync. std::string name = "Lredex/$EnumUtils;"; DexType* type = DexType::get_type(name); while (type) { name.insert(name.size() - 1, "$u"); type = DexType::get_type(name); } type = DexType::make_type(name.c_str()); ClassCreator cc(type); cc.set_access(ACC_PUBLIC | ACC_FINAL); cc.set_super(type::java_lang_Object()); DexClass* cls = cc.create(); cls->rstate.set_generated(); cls->rstate.set_clinit_has_no_side_effects(); auto values_field = make_values_field(cls); auto clinit_method = make_clinit_method(cls, fields_count); auto clinit_code = clinit_method->get_code(); m_fields.reserve(fields_count); for (uint32_t i = 0; i < fields_count; ++i) { m_fields.push_back(make_a_field(cls, i, clinit_code)); } clinit_code->push_back(dasm(OPCODE_SPUT_OBJECT, values_field, {2_v})); clinit_code->push_back(dasm(OPCODE_RETURN_VOID)); m_values_method_ref = make_values_method(cls, values_field, fields_count); return cls; } /** * LEnumUtils;.$VALUES:[Ljava/lang/Integer; */ DexFieldRef* make_values_field(DexClass* cls) { auto name = DexString::make_string("$VALUES"); auto field = DexField::make_field(cls->get_type(), name, type::make_array_type(INTEGER_TYPE)) ->make_concrete(ACC_PRIVATE | ACC_FINAL | ACC_STATIC); cls->add_field(field); field->set_deobfuscated_name(show_deobfuscated(field)); return (DexFieldRef*)field; } /** * Create a static final Integer field and update <clinit> code. */ DexFieldRef* make_a_field(DexClass* cls, uint32_t value, IRCode* code) { // Note that the EnumUtilsFieldAnalyzer does pattern matching on fields of // the form $EnumUtils.fXXX, and should be kept in sync. auto name = DexString::make_string("f" + std::to_string(value)); auto field = DexField::make_field(cls->get_type(), name, INTEGER_TYPE) ->make_concrete(ACC_PUBLIC | ACC_FINAL | ACC_STATIC); cls->add_field(field); field->set_deobfuscated_name(show_deobfuscated(field)); code->push_back(dasm(OPCODE_CONST, {1_v, {LITERAL, value}})); code->push_back(dasm(OPCODE_INVOKE_STATIC, INTEGER_VALUEOF_METHOD, {1_v})); code->push_back(dasm(OPCODE_MOVE_RESULT_OBJECT, {0_v})); code->push_back(dasm(OPCODE_SPUT_OBJECT, field, {0_v})); code->push_back(dasm(OPCODE_APUT_OBJECT, {0_v, 2_v, 1_v})); return (DexFieldRef*)field; } /** * Make <clinit> method. */ DexMethod* make_clinit_method(DexClass* cls, uint32_t fields_count) { auto proto = DexProto::make_proto(type::_void(), DexTypeList::make_type_list({})); DexMethod* method = DexMethod::make_method(cls->get_type(), CLINIT_METHOD_STR, proto) ->make_concrete(ACC_STATIC | ACC_CONSTRUCTOR, false); method->set_code(std::make_unique<IRCode>()); cls->add_method(method); method->set_deobfuscated_name(show_deobfuscated(method)); auto code = method->get_code(); // const v2, xx // new-array v2, v2, [Integer code->push_back(dasm(OPCODE_CONST, {2_v, {LITERAL, fields_count}})); code->push_back( dasm(OPCODE_NEW_ARRAY, type::make_array_type(INTEGER_TYPE), {2_v})); code->push_back(dasm(IOPCODE_MOVE_RESULT_PSEUDO_OBJECT, {2_v})); code->set_registers_size(3); return method; } /** * LEnumUtils;.values:(I)[Ljava/lang/Integer; * * We construct an array field at class loading time, which stores some of the * integers. Copy part of the array if the required integers are in the array, * otherwise copy all of them and construct more. The following comments are * the basic blocks of this method. * * res = new Integer[count] * if count <= VALUES.length * : small_argument_block * copy_size = count * goto :copy_array_block * else * : large_argument_block * copy_size = VALUES.length * id = copy_size * goto :integers_block * : integers_block * if id < count * : one_integer_block * res[id] = Integer.valueOf(id) * id = id + 1 * goto :integers_block * else * goto :copy_array_block * : copy_array_block * System.arraycopy(VALUES, 0, res, 0, copy_size); * return res */ DexMethodRef* make_values_method(DexClass* cls, DexFieldRef* values_field, uint32_t total_integer_fields) { auto name = DexString::make_string("values"); auto integer_array_type = type::make_array_type(INTEGER_TYPE); DexProto* proto = DexProto::make_proto( integer_array_type, DexTypeList::make_type_list({type::_int()})); DexMethod* method = DexMethod::make_method(cls->get_type(), name, proto) ->make_concrete(ACC_PUBLIC | ACC_STATIC, false); method->set_code(std::make_unique<IRCode>(method, 0)); cls->add_method(method); method->set_deobfuscated_name(show_deobfuscated(method)); auto code = method->get_code(); code->build_cfg(); auto& cfg = code->cfg(); auto entry = cfg.entry_block(); auto small_argument_block = cfg.create_block(); auto large_argument_block = cfg.create_block(); auto one_integer_block = cfg.create_block(); auto integers_block = cfg.create_block(); auto copy_array_block = cfg.create_block(); cfg.add_edge(small_argument_block, copy_array_block, cfg::EDGE_GOTO); cfg.add_edge(large_argument_block, integers_block, cfg::EDGE_GOTO); cfg.add_edge(one_integer_block, integers_block, cfg::EDGE_GOTO); entry->push_back( {dasm(OPCODE_NEW_ARRAY, integer_array_type, {0_v}), dasm(IOPCODE_MOVE_RESULT_PSEUDO_OBJECT, {1_v}), dasm(OPCODE_CONST, {2_v, {LITERAL, total_integer_fields}})}); cfg.create_branch(entry, dasm(OPCODE_IF_LE, {0_v, 2_v}), large_argument_block, small_argument_block); small_argument_block->push_back(dasm(OPCODE_MOVE, {4_v, 0_v})); large_argument_block->push_back( {dasm(OPCODE_MOVE, {4_v, 2_v}), dasm(OPCODE_MOVE, {5_v, 2_v})}); cfg.create_branch(integers_block, dasm(OPCODE_IF_LT, {5_v, 0_v}), copy_array_block, one_integer_block); one_integer_block->push_back( {dasm(OPCODE_INVOKE_STATIC, INTEGER_VALUEOF_METHOD, {5_v}), dasm(OPCODE_MOVE_RESULT_OBJECT, {6_v}), dasm(OPCODE_APUT_OBJECT, {6_v, 1_v, 5_v}), dasm(OPCODE_ADD_INT_LIT8, {5_v, 5_v, 1_L})}); auto copy_array_method = DexMethod::make_method( "Ljava/lang/System;.arraycopy:(Ljava/lang/Object;ILjava/lang/" "Object;II)V"); copy_array_block->push_back({dasm(OPCODE_SGET_OBJECT, values_field), dasm(IOPCODE_MOVE_RESULT_PSEUDO_OBJECT, {7_v}), dasm(OPCODE_CONST, {8_v, 0_L}), dasm(OPCODE_INVOKE_STATIC, copy_array_method, {7_v, 8_v, 1_v, 8_v, 4_v}), dasm(OPCODE_RETURN_OBJECT, {1_v})}); cfg.recompute_registers_size(); code->clear_cfg(); return (DexMethodRef*)method; } }; struct InsnReplacement { cfg::InstructionIterator original_insn; std::vector<IRInstruction*> replacements; InsnReplacement(cfg::ControlFlowGraph& cfg, cfg::Block* block, MethodItemEntry* mie, IRInstruction* new_insn) : original_insn(block->to_cfg_instruction_iterator(*mie)), replacements{new_insn} { push_back_move_result(cfg, new_insn); } InsnReplacement(cfg::ControlFlowGraph& cfg, cfg::Block* block, MethodItemEntry* mie, std::vector<IRInstruction*>& new_insns) : original_insn(block->to_cfg_instruction_iterator(*mie)), replacements(std::move(new_insns)) { if (!replacements.empty()) { auto new_insn = *replacements.rbegin(); push_back_move_result(cfg, new_insn); } } private: /** * If the original instruction was paired with a `move-result`, create a new * one with the same destination register (and possibly the same opcode) * because the original one will be removed. */ void push_back_move_result(cfg::ControlFlowGraph& cfg, IRInstruction* new_insn) { auto org_move_insn_it = cfg.move_result_of(original_insn); if (!org_move_insn_it.is_end()) { auto& org_move_insn = org_move_insn_it.unwrap()->insn; auto& org_insn = original_insn.unwrap()->insn; auto org_op = org_move_insn->opcode(); auto dest = org_move_insn->dest(); IROpcode new_op = org_op; if (org_insn->has_move_result() && new_insn->has_move_result_pseudo()) { new_op = opcode::move_result_to_pseudo(org_op); } else if (org_insn->has_move_result_pseudo() && new_insn->has_move_result()) { new_op = opcode::pseudo_to_move_result(org_op); } replacements.push_back(dasm(new_op, {{VREG, dest}})); } } }; /** * Code transformation for a method. */ class CodeTransformer final { public: CodeTransformer(const EnumAttributeMap& m_enum_attributes_map, EnumUtil* enum_util, DexMethod* method) : m_enum_attributes_map(m_enum_attributes_map), m_enum_util(enum_util), m_method(method) {} void run() { optimize_enums::EnumTypeEnvironment start_env = optimize_enums::EnumFixpointIterator::gen_env(m_method); auto* code = m_method->get_code(); code->build_cfg(); auto& cfg = code->cfg(); optimize_enums::EnumFixpointIterator engine(cfg, m_enum_util->m_config); engine.run(start_env); for (auto& block : cfg.blocks()) { optimize_enums::EnumTypeEnvironment env = engine.get_entry_state_at(block); for (auto it = block->begin(); it != block->end(); ++it) { if (it->type == MFLOW_OPCODE) { engine.analyze_instruction(it->insn, &env); update_instructions(env, cfg, block, &(*it)); } } } // We could not insert invoke-kind instructions to editable cfg when we // iterate the cfg. If we're inside a try region, inserting invoke-kind will // split the block and insert a move-result in the new goto successor block, // thus invalidating iterators into the CFG. See the comment on the // insertion methods in ControlFlow.h for more details. for (const auto& info : m_replacements) { cfg.replace_insns(info.original_insn, info.replacements); } code->clear_cfg(); } private: void update_instructions(const optimize_enums::EnumTypeEnvironment& env, cfg::ControlFlowGraph& cfg, cfg::Block* block, MethodItemEntry* mie) { auto insn = mie->insn; switch (insn->opcode()) { case OPCODE_SGET_OBJECT: update_sget_object(env, cfg, block, mie); break; case OPCODE_IGET: case OPCODE_IGET_WIDE: case OPCODE_IGET_OBJECT: case OPCODE_IGET_BOOLEAN: case OPCODE_IGET_BYTE: case OPCODE_IGET_CHAR: case OPCODE_IGET_SHORT: update_iget(cfg, block, mie); break; case OPCODE_INVOKE_VIRTUAL: { auto method = insn->get_method(); if (method::signatures_match(method, m_enum_util->ENUM_ORDINAL_METHOD)) { update_invoke_virtual(env, cfg, block, mie, m_enum_util->INTEGER_INTVALUE_METHOD); } else if (method::signatures_match(method, m_enum_util->ENUM_EQUALS_METHOD)) { update_invoke_virtual(env, cfg, block, mie, m_enum_util->INTEGER_EQUALS_METHOD); } else if (method::signatures_match(method, m_enum_util->ENUM_COMPARETO_METHOD)) { update_invoke_virtual(env, cfg, block, mie, m_enum_util->INTEGER_COMPARETO_METHOD); } else if (method::signatures_match(method, m_enum_util->ENUM_NAME_METHOD)) { update_invoke_name(env, cfg, block, mie); } else if (method::signatures_match(method, m_enum_util->ENUM_HASHCODE_METHOD)) { update_invoke_hashcode(env, cfg, block, mie); } else if (method == m_enum_util->STRINGBUILDER_APPEND_OBJ_METHOD) { update_invoke_stringbuilder_append(env, cfg, block, mie); } else { update_invoke_user_method(env, cfg, block, mie); } } break; case OPCODE_INVOKE_DIRECT: { auto method = insn->get_method(); if (!method::is_init(method)) { update_invoke_user_method(env, cfg, block, mie); } } break; case OPCODE_INVOKE_STATIC: { auto method = insn->get_method(); if (method == m_enum_util->STRING_VALUEOF_METHOD) { update_invoke_string_valueof(env, cfg, block, mie); } else if (is_enum_values(method)) { update_invoke_values(env, cfg, block, mie); } else if (is_enum_valueof(method)) { update_invoke_valueof(env, cfg, block, mie); } } break; case OPCODE_NEW_ARRAY: { auto array_type = insn->get_type(); auto new_type = try_convert_to_int_type(array_type); if (new_type) { insn->set_type(new_type); } } break; case OPCODE_CHECK_CAST: { auto type = insn->get_type(); auto new_type = try_convert_to_int_type(type); if (new_type) { auto possible_src_types = env.get(insn->src(0)); if (possible_src_types.size() != 0) { DexType* candidate_type = extract_candidate_enum_type(possible_src_types); always_assert(candidate_type == type); } // Empty src_types means the src register holds null object. insn->set_type(new_type); } else if (type == m_enum_util->ENUM_TYPE) { always_assert(!extract_candidate_enum_type(env.get(insn->src(0)))); } } break; default: { if (insn->has_type() && insn->opcode() != IOPCODE_INIT_CLASS) { auto type = insn->get_type(); always_assert_log(try_convert_to_int_type(type) == nullptr, "Unhandled type in %s method %s\n", SHOW(insn), SHOW(m_method)); } } break; } } /** * If the field is a candidate enum field, * sget-object LCandidateEnum;.f:LCandidateEnum; => * sget-object LEnumUtils;.f?:Integer * or * const v_ordinal #?? * invoke-static v_ordinal Integer.valueOf:(I)Integer */ void update_sget_object(const optimize_enums::EnumTypeEnvironment& env, cfg::ControlFlowGraph& cfg, cfg::Block* block, MethodItemEntry* mie) { auto insn = mie->insn; auto field = insn->get_field(); if (!m_enum_attributes_map.count(field->get_type())) { return; } auto& constants = m_enum_attributes_map.at(field->get_type()).m_constants_map; if (!constants.count(field)) { return; } uint32_t ordinal = constants.at(field).ordinal; if (ordinal < m_enum_util->m_config.max_enum_size) { auto new_field = m_enum_util->m_fields.at(constants.at(field).ordinal); auto new_insn = dasm(OPCODE_SGET_OBJECT, new_field); m_replacements.push_back(InsnReplacement(cfg, block, mie, new_insn)); } else { always_assert( m_enum_util->m_config.breaking_reference_equality_allowlist.count( field->get_type())); auto ordinal_reg = allocate_temp(); std::vector<IRInstruction*> new_insns; new_insns.push_back( dasm(OPCODE_CONST, {{VREG, ordinal_reg}, {LITERAL, ordinal}})); new_insns.push_back(dasm(OPCODE_INVOKE_STATIC, m_enum_util->INTEGER_VALUEOF_METHOD, {{VREG, ordinal_reg}})); m_replacements.push_back(InsnReplacement(cfg, block, mie, new_insns)); } } /** * If the instance field belongs to a CandidateEnum, replace the `iget` * instruction with a static call to the correct method. * * iget(-object|-wide)? vObj LCandidateEnum;.iField:Ltype; * move-result-pseudo vDest * => * invoke-static {vObj}, LCandidateEnum;.redex$OE$get_iField:(Integer;)Ltype; * move-result(-object|-wide)? vDest */ void update_iget(cfg::ControlFlowGraph& cfg, cfg::Block* block, MethodItemEntry* mie) { auto insn = mie->insn; auto ifield = insn->get_field(); auto enum_type = ifield->get_class(); if (!m_enum_attributes_map.count(enum_type)) { return; } auto vObj = insn->src(0); auto get_ifield_method = m_enum_util->add_get_ifield_method(enum_type, ifield); m_replacements.push_back(InsnReplacement( cfg, block, mie, dasm(OPCODE_INVOKE_STATIC, get_ifield_method, {{VREG, vObj}}))); } /** * If LCandidateEnum; is a candidate enum class, * invoke-static LCandidateEnum;.values:()[LCandidateEnum; => * const vn, xxx * invoke-static vn LEnumUtils;.values:(I)[Integer */ void update_invoke_values(const optimize_enums::EnumTypeEnvironment&, cfg::ControlFlowGraph& cfg, cfg::Block* block, MethodItemEntry* mie) { auto insn = mie->insn; auto method = insn->get_method(); auto container = method->get_class(); auto attributes_it = m_enum_attributes_map.find(container); if (attributes_it != m_enum_attributes_map.end()) { auto reg = allocate_temp(); uint64_t enum_size = attributes_it->second.m_constants_map.size(); always_assert(enum_size); std::vector<IRInstruction*> new_insns; new_insns.push_back( dasm(OPCODE_CONST, {{VREG, reg}, {LITERAL, static_cast<int64_t>(enum_size)}})); new_insns.push_back(dasm(OPCODE_INVOKE_STATIC, m_enum_util->m_values_method_ref, {{VREG, reg}})); m_replacements.push_back(InsnReplacement(cfg, block, mie, new_insns)); } } /** * If LCandidateEnum; is a candidate enum class, * invoke-static v0 LCandidateEnum;.valueOf:(String)LCandidateEnum; => * invoke-static v0 LCandidateEnum;.redex$OE$valueOf:(String)Integer */ void update_invoke_valueof(const optimize_enums::EnumTypeEnvironment&, cfg::ControlFlowGraph& cfg, cfg::Block* block, MethodItemEntry* mie) { auto insn = mie->insn; auto container = insn->get_method()->get_class(); if (!m_enum_attributes_map.count(container)) { return; } auto valueof_method = m_enum_util->add_substitute_of_valueof(container); auto reg = insn->src(0); m_replacements.push_back(InsnReplacement( cfg, block, mie, dasm(OPCODE_INVOKE_STATIC, valueof_method, {{VREG, reg}}))); } /** * If v0 is a candidate enum, * invoke-virtual v0 LCandidateEnum;.name:()Ljava/lang/String; or * invoke-virtual v0 LCandidateEnum;.toString:()Ljava/lang/String; => * invoke-static v0 LCandidateEnum;.redex$OE$name:(Integer)String */ void update_invoke_name(const optimize_enums::EnumTypeEnvironment& env, cfg::ControlFlowGraph& cfg, cfg::Block* block, MethodItemEntry* mie) { auto insn = mie->insn; auto container = insn->get_method()->get_class(); auto reg = insn->src(0); auto candidate_type = infer_candidate_type(env.get(reg), container); if (!candidate_type) { return; } auto helper_method = m_enum_util->add_substitute_of_name(candidate_type); m_replacements.push_back(InsnReplacement( cfg, block, mie, dasm(OPCODE_INVOKE_STATIC, helper_method, {{VREG, reg}}))); } /** * If v0 is a candidate enum, * invoke-virtual v0 LCandidateEnum;.hashCode:()I => * invoke-static v0 LCandidateEnum;.redex$OE$hashCode:(Integer)I */ void update_invoke_hashcode(const optimize_enums::EnumTypeEnvironment& env, cfg::ControlFlowGraph& cfg, cfg::Block* block, MethodItemEntry* mie) { auto insn = mie->insn; auto container = insn->get_method()->get_class(); auto src_reg = insn->src(0); auto candidate_type = infer_candidate_type(env.get(src_reg), container); if (!candidate_type) { return; } auto helper_method = m_enum_util->add_substitute_of_hashcode(candidate_type); m_replacements.push_back(InsnReplacement( cfg, block, mie, dasm(OPCODE_INVOKE_STATIC, helper_method, {{VREG, src_reg}}))); } /** * If v0 is a candidate enum object, * invoke-static v0 LString;.valueOf:(LObject;)LString; * => * invoke-static v0 LCandidateEnum;.redex$OE$String_valueOf:(Integer)String */ void update_invoke_string_valueof( const optimize_enums::EnumTypeEnvironment& env, cfg::ControlFlowGraph& cfg, cfg::Block* block, MethodItemEntry* mie) { auto insn = mie->insn; DexType* candidate_type = extract_candidate_enum_type(env.get(insn->src(0))); if (candidate_type == nullptr) { return; } DexMethodRef* string_valueof_meth = m_enum_util->add_substitute_of_stringvalueof(candidate_type); m_replacements.push_back( InsnReplacement(cfg, block, mie, dasm(OPCODE_INVOKE_STATIC, string_valueof_meth, {{VREG, insn->src(0)}}))); } /** * If v1 is a candidate enum, * invoke-virtual v0 v1 LStringBuilder;.append(Object):LStringBuilder; * => * invoke-static v1 LCandidateEnum;.redex$OE$String_valueOf:(Integer)String * move-result-object vn * invoke-virtual v0 vn LStringBuilder;.append:(String)LStringBuilder; */ void update_invoke_stringbuilder_append( const optimize_enums::EnumTypeEnvironment& env, cfg::ControlFlowGraph& cfg, cfg::Block* block, MethodItemEntry* mie) { auto insn = mie->insn; DexType* candidate_type = extract_candidate_enum_type(env.get(insn->src(1))); if (candidate_type == nullptr) { return; } DexMethodRef* string_valueof_meth = m_enum_util->add_substitute_of_stringvalueof(candidate_type); auto reg0 = insn->src(0); auto reg1 = insn->src(1); auto str_reg = allocate_temp(); std::vector<IRInstruction*> new_insns{ dasm(OPCODE_INVOKE_STATIC, string_valueof_meth, {{VREG, reg1}}), dasm(OPCODE_MOVE_RESULT_OBJECT, {{VREG, str_reg}}), dasm(OPCODE_INVOKE_VIRTUAL, m_enum_util->STRINGBUILDER_APPEND_STR_METHOD, {{VREG, reg0}, {VREG, str_reg}})}; m_replacements.push_back(InsnReplacement(cfg, block, mie, new_insns)); } /** * If v0 is a candidate enum, * invoke-virtual v0 LCandidateEnum;.ordinal:()I => * invoke-virtual v0 Integer.intValue()I, * * invoke-virtual v0, v1 LCandidateEnum;.equals:(Ljava/lang/Object;)Z => * invoke-virtual v0, v1 Integer.equals(Ljava/lang/Object;)Z, * * invoke-virtual v0, v1 LCandidateEnum;.compareTo:(Ljava/lang/Object;)I => * invoke-virtual v0, v1 Integer.compareTo(Ljava/lang/Integer;)I */ void update_invoke_virtual(const optimize_enums::EnumTypeEnvironment& env, cfg::ControlFlowGraph& cfg, cfg::Block* block, MethodItemEntry* mie, DexMethodRef* integer_meth) { auto insn = mie->insn; auto container = insn->get_method()->get_class(); auto src_reg = insn->src(0); auto candidate_type = infer_candidate_type(env.get(src_reg), container); if (!candidate_type) { return; } auto new_insn = new IRInstruction(OPCODE_INVOKE_VIRTUAL); new_insn->set_method(integer_meth)->set_srcs_size(insn->srcs_size()); for (size_t id = 0; id < insn->srcs_size(); ++id) { new_insn->set_src(id, insn->src(id)); } m_replacements.push_back(InsnReplacement(cfg, block, mie, new_insn)); } /** * If this is an invocation of a user-defined virtual or direct method on a * CandidateEnum, then we make that method static. If that method is * toString(), then we call one of the appropriate methods Enum.name() * or CandidateEnum.toString(). Otherwise we do nothing. */ void update_invoke_user_method(const optimize_enums::EnumTypeEnvironment& env, cfg::ControlFlowGraph& cfg, cfg::Block* block, MethodItemEntry* mie) { auto insn = mie->insn; auto method_ref = insn->get_method(); auto container_type = method_ref->get_class(); auto candidate_type = infer_candidate_type(env.get(insn->src(0)), container_type); if (!candidate_type) { return; } // If this is toString() and there is no CandidateEnum.toString(), then we // call Enum.name() instead. if (method::signatures_match(method_ref, m_enum_util->ENUM_TOSTRING_METHOD) && m_enum_util->get_user_defined_tostring_method( type_class(candidate_type)) == nullptr) { update_invoke_name(env, cfg, block, mie); } else { auto method = resolve_method(method_ref, opcode_to_search(insn)); always_assert(method); auto new_insn = (new IRInstruction(*insn)) ->set_opcode(OPCODE_INVOKE_STATIC) ->set_method(method); m_replacements.push_back(InsnReplacement(cfg, block, mie, new_insn)); } } /** * Infer a candidate type from an instruction like * `invoke-virtual vReg, Target.method()` * * Return a candidate type if we can get only one, return null if all these * types are not related to our candidate types. Bail out if the type are * mixed (our analysis part should have excluded this case). */ DexType* infer_candidate_type(const EnumTypes& reg_types, DexType* target_type) { DexType* candidate_type = nullptr; if (is_a_candidate(target_type)) { candidate_type = target_type; } else if (!m_enum_util->is_super_type_of_candidate_enum(target_type)) { return nullptr; } auto type_set = reg_types.elements(); if (type_set.empty()) { // Register holds null value, we infer the type in instruction. return candidate_type; } else if (candidate_type) { always_assert_log(type_set.size() == 1 && *type_set.begin() == candidate_type, "%s != %s", SHOW(type_set), SHOW(candidate_type)); return candidate_type; } else if (type_set.size() == 1) { candidate_type = *type_set.begin(); return is_a_candidate(candidate_type) ? candidate_type : nullptr; } else { for (auto t : type_set) { always_assert_log(!is_a_candidate(t), "%s\n", SHOW(t)); } return nullptr; } } /** * Return nullptr if the types contain none of the candidate enums, * return the candidate type if types only contain one candidate enum and do * not contain other types, * or assertion failure when the types are mixed. */ DexType* extract_candidate_enum_type(const EnumTypes& types) { return infer_candidate_type(types, m_enum_util->OBJECT_TYPE); } DexType* try_convert_to_int_type(DexType* type) { return m_enum_util->try_convert_to_int_type(m_enum_attributes_map, type); } bool is_a_candidate(DexType* type) const { auto elem_type = const_cast<DexType*>(type::get_element_type_if_array(type)); return m_enum_attributes_map.count(elem_type); } inline reg_t allocate_temp() { return m_method->get_code()->cfg().allocate_temp(); } const EnumAttributeMap& m_enum_attributes_map; EnumUtil* m_enum_util; DexMethod* m_method; std::vector<InsnReplacement> m_replacements; }; /** * Transform enum usages in the stores. */ class EnumTransformer final { public: /** * EnumTransformer constructor. Analyze <clinit> of candidate enums. */ EnumTransformer(const Config& config, DexStoresVector* stores) : m_stores(*stores), m_int_objs(0) { m_enum_util = std::make_unique<EnumUtil>(config); for (auto it = config.candidate_enums.begin(); it != config.candidate_enums.end(); ++it) { auto enum_cls = type_class(*it); auto attributes = optimize_enums::analyze_enum_clinit(enum_cls); size_t num_enum_constants = attributes.m_constants_map.size(); if (num_enum_constants == 0) { TRACE(ENUM, 2, "\tCannot analyze enum %s : ord %lu sfields %lu", SHOW(enum_cls), num_enum_constants, enum_cls->get_sfields().size()); continue; } else if (num_enum_constants > config.max_enum_size) { if (!config.breaking_reference_equality_allowlist.count(*it)) { TRACE(ENUM, 2, "\tSkip %s %lu values", SHOW(enum_cls), num_enum_constants); continue; } else { TRACE(ENUM, 2, "\tOptimimze %s (%lu values) but object equality is not " "guaranteed", SHOW(enum_cls), num_enum_constants); } } m_int_objs = std::max<uint32_t>(m_int_objs, num_enum_constants); m_enum_objs += num_enum_constants; m_enum_attributes_map.emplace(*it, attributes); clean_generated_methods_fields(enum_cls); opt_metadata::log_opt(ENUM_OPTIMIZED, enum_cls); } m_enum_util->create_util_class(stores, m_int_objs); } EnumTransformer(const EnumTransformer&) = delete; void run() { auto scope = build_class_scope(m_stores); // Update all the instructions. walk::parallel::code( scope, [&](DexMethod* method) { if (m_enum_attributes_map.count(method->get_class()) && is_generated_enum_method(method)) { return false; } std::vector<DexType*> types; method->gather_types(types); return std::any_of(types.begin(), types.end(), [this](DexType* type) { return (bool)try_convert_to_int_type(type); }); }, [&](DexMethod* method, IRCode& code) { if (m_enum_attributes_map.count(method->get_class()) && (!is_constructor(method) && !is_static(method))) { m_enum_util->m_instance_methods.insert(method); } CodeTransformer code_updater(m_enum_attributes_map, m_enum_util.get(), method); code_updater.run(); }); create_substitute_methods(m_enum_util->m_substitute_methods); std::vector<DexMethod*> instance_methods( m_enum_util->m_instance_methods.begin(), m_enum_util->m_instance_methods.end()); std::sort(instance_methods.begin(), instance_methods.end(), dexmethods_comparator()); for (auto method : instance_methods) { mutators::make_static(method); } std::map<DexFieldRef*, DexMethodRef*, dexfields_comparator> field_to_method( m_enum_util->m_get_instance_field_methods.begin(), m_enum_util->m_get_instance_field_methods.end()); for (auto& pair : field_to_method) { create_get_instance_field_method(pair.second, pair.first); } post_update_enum_classes(scope); // Update all methods and fields references by replacing the candidate enum // types with Integer type. std::unordered_map<DexType*, DexType*> type_mapping; for (auto& pair : m_enum_attributes_map) { type_mapping[pair.first] = m_enum_util->INTEGER_TYPE; } type_reference::TypeRefUpdater updater(type_mapping); updater.update_methods_fields(scope); sanity_check(scope); } uint32_t get_int_objs_count() { return m_int_objs; } uint32_t get_enum_objs_count() { return m_enum_objs; } private: /** * Go through all instructions and check that all the methods, fields, and * types they reference actually exist. */ void sanity_check(Scope& scope) { walk::parallel::code(scope, [this](DexMethod* method, IRCode& code) { for (auto& mie : InstructionIterable(code)) { auto insn = mie.insn; if (insn->has_method()) { auto method_ref = insn->get_method(); auto container = method_ref->get_class(); if (m_enum_attributes_map.count(container)) { always_assert_log(method_ref->is_def(), "Invalid insn %s in %s\n", SHOW(insn), SHOW(method)); } } else if (insn->has_field()) { auto field_ref = insn->get_field(); auto container = field_ref->get_class(); if (m_enum_attributes_map.count(container)) { always_assert_log(field_ref->is_def(), "Invalid insn %s in %s\n", SHOW(insn), SHOW(method)); } } else if (insn->has_type() && insn->opcode() != IOPCODE_INIT_CLASS) { auto type_ref = insn->get_type(); always_assert_log(!try_convert_to_int_type(type_ref), "Invalid insn %s in %s\n", SHOW(insn), SHOW(method)); } } }); } void create_substitute_methods(const ConcurrentSet<DexMethodRef*>& methods) { for (auto ref : methods) { if (ref->get_name() == m_enum_util->REDEX_NAME) { create_name_method(ref); } else if (ref->get_name() == m_enum_util->REDEX_HASHCODE) { create_hashcode_method(ref); } else if (ref->get_name() == m_enum_util->REDEX_VALUEOF) { create_valueof_method(ref); } else if (ref->get_name() == m_enum_util->REDEX_STRING_VALUEOF) { create_stringvalueof_method(ref); } } } /** * Substitute for String.valueOf(Object obj). * * public static String redex$OE$String_valueOf(Integer obj) { * if (obj == null) { * return "null"; * } * return CandidateEnum.toString(obj); * } */ void create_stringvalueof_method(DexMethodRef* ref) { MethodCreator mc(ref, ACC_STATIC | ACC_PUBLIC); auto method = mc.create(); auto cls = type_class(ref->get_class()); cls->add_method(method); auto code = method->get_code(); code->build_cfg(); auto& cfg = code->cfg(); auto entry = cfg.entry_block(); auto return_null_block = cfg.create_block(); return_null_block->push_back( {dasm(OPCODE_CONST_STRING, DexString::make_string("null")), dasm(IOPCODE_MOVE_RESULT_PSEUDO_OBJECT, {1_v}), dasm(OPCODE_RETURN_OBJECT, {1_v})}); auto obj_tostring_block = cfg.create_block(); { auto tostring_meth = m_enum_util->get_substitute_of_tostring(ref->get_class()); obj_tostring_block->push_back( {dasm(OPCODE_INVOKE_STATIC, tostring_meth, {0_v}), dasm(OPCODE_MOVE_RESULT_OBJECT, {1_v}), dasm(OPCODE_RETURN_OBJECT, {1_v})}); } cfg.create_branch(entry, dasm(OPCODE_IF_EQZ, {0_v}), obj_tostring_block, return_null_block); cfg.recompute_registers_size(); code->clear_cfg(); } /** * Substitute for LCandidateEnum;.valueOf(String s) * * public static Integer redex$OE$valueOf(String s) { * if (s == "xxx") { * return f0; * } else if (s == "y") { * return f1; * } ... * } else { * throw new IllegalArgumentException(s); * } * } * * Note that the string of the exception is shortened. */ void create_valueof_method(DexMethodRef* ref) { MethodCreator mc(ref, ACC_STATIC | ACC_PUBLIC); auto method = mc.create(); auto cls = type_class(ref->get_class()); cls->add_method(method); auto code = method->get_code(); code->build_cfg(); auto& cfg = code->cfg(); auto prev_block = cfg.entry_block(); for (auto& pair : m_enum_attributes_map[ref->get_class()].get_ordered_names()) { prev_block->push_back({dasm(OPCODE_CONST_STRING, pair.second), dasm(IOPCODE_MOVE_RESULT_PSEUDO_OBJECT, {1_v}), dasm(OPCODE_INVOKE_VIRTUAL, m_enum_util->STRING_EQ_METHOD, {0_v, 1_v}), dasm(OPCODE_MOVE_RESULT, {3_v})}); auto equal_block = cfg.create_block(); { auto obj_field = m_enum_util->m_fields[pair.first]; equal_block->push_back({dasm(OPCODE_SGET_OBJECT, obj_field), dasm(IOPCODE_MOVE_RESULT_PSEUDO_OBJECT, {2_v}), dasm(OPCODE_RETURN_OBJECT, {2_v})}); } auto ne_block = cfg.create_block(); cfg.create_branch(prev_block, dasm(OPCODE_IF_EQZ, {3_v}), equal_block, ne_block); prev_block = ne_block; } prev_block->push_back( {dasm(OPCODE_NEW_INSTANCE, m_enum_util->ILLEGAL_ARG_EXCP_TYPE, {}), dasm(IOPCODE_MOVE_RESULT_PSEUDO_OBJECT, {1_v}), dasm(OPCODE_INVOKE_DIRECT, m_enum_util->ILLEGAL_ARG_CONSTRUCT_METHOD, {1_v, 0_v}), dasm(OPCODE_THROW, {1_v})}); cfg.recompute_registers_size(); code->clear_cfg(); } /** * Substitute for LCandidateEnum;.name() * * public static String redex$OE$name(Integer obj) { * switch(obj.intValue()) { * case 0 : ...; * case 1 : ...; * ... * } * } */ void create_name_method(DexMethodRef* ref) { MethodCreator mc(ref, ACC_STATIC | ACC_PUBLIC); auto method = mc.create(); auto cls = type_class(ref->get_class()); cls->add_method(method); auto code = method->get_code(); code->build_cfg(); auto& cfg = code->cfg(); auto entry = cfg.entry_block(); entry->push_back({dasm(OPCODE_INVOKE_VIRTUAL, m_enum_util->INTEGER_INTVALUE_METHOD, {0_v}), dasm(OPCODE_MOVE_RESULT, {0_v})}); std::vector<std::pair<int32_t, cfg::Block*>> cases; for (auto& pair : m_enum_attributes_map[ref->get_class()].get_ordered_names()) { auto block = cfg.create_block(); cases.emplace_back(pair.first, block); block->push_back({dasm(OPCODE_CONST_STRING, pair.second), dasm(IOPCODE_MOVE_RESULT_PSEUDO_OBJECT, {1_v}), dasm(OPCODE_RETURN_OBJECT, {1_v})}); } // This goto edge should never be taken, but we need a goto edge because the // switch is not a valid way to end a method. A switch cannot end a block // because the on-device dex verifier is unable to prove if the switch is // exhaustive. // // Arbitrarily choose the first case block. cfg.create_branch(entry, dasm(OPCODE_SWITCH, {0_v}), cases.front().second, cases); cfg.recompute_registers_size(); code->clear_cfg(); } /** * Substitute for LCandidateEnum:.hashCode() * * Since `Enum.hashCode()` is not in the Java spec so that different JVMs * may have different implementations and since hashcodes are usually * only used as keys to hash maps we can choose one implementation. * https://android.googlesource.com/platform/libcore/+/9edf43dfcc35c761d97eb9156ac4254152ddbc55/libdvm/src/main/java/java/lang/Enum.java#118 * * * public static int redex$OE$hashCode(Integer obj) { * String name = CandidateEnum.name(obj); * return obj.intValue() + name.hashCode(); * } */ void create_hashcode_method(DexMethodRef* ref) { MethodCreator mc(ref, ACC_STATIC | ACC_PUBLIC); auto method = mc.create(); auto cls = type_class(ref->get_class()); cls->add_method(method); auto code = method->get_code(); code->build_cfg(); auto& cfg = code->cfg(); auto entry = cfg.entry_block(); auto name_method = m_enum_util->get_substitute_of_name(ref->get_class()); entry->push_back({ dasm(OPCODE_INVOKE_STATIC, name_method, {0_v}), dasm(OPCODE_MOVE_RESULT_OBJECT, {1_v}), dasm(OPCODE_INVOKE_VIRTUAL, m_enum_util->STRING_HASHCODE_METHOD, {1_v}), dasm(OPCODE_MOVE_RESULT, {1_v}), dasm(OPCODE_INVOKE_VIRTUAL, m_enum_util->INTEGER_INTVALUE_METHOD, {0_v}), dasm(OPCODE_MOVE_RESULT, {2_v}), dasm(OPCODE_ADD_INT, {1_v, 1_v, 2_v}), dasm(OPCODE_RETURN, {1_v}), }); cfg.recompute_registers_size(); code->clear_cfg(); } /** * Create a helper method to replace `iget` instructions that returns an * instance field value given the enum ordinal. * * public static [type] redex$OE$get_instanceField(Integer obj) { * switch (obj.intValue()) { * case 0: return value0; * case 1: return value1; * ... * } * } */ void create_get_instance_field_method(DexMethodRef* method_ref, DexFieldRef* ifield_ref) { MethodCreator mc(method_ref, ACC_STATIC | ACC_PUBLIC); auto method = mc.create(); auto cls = type_class(method_ref->get_class()); cls->add_method(method); auto code = method->get_code(); code->build_cfg(); auto& cfg = code->cfg(); auto entry = cfg.entry_block(); entry->push_back({dasm(OPCODE_INVOKE_VIRTUAL, m_enum_util->INTEGER_INTVALUE_METHOD, {0_v}), dasm(OPCODE_MOVE_RESULT, {0_v})}); auto ifield_type = ifield_ref->get_type(); std::vector<std::pair<int32_t, cfg::Block*>> cases; for (auto& pair : m_enum_attributes_map[cls->get_type()].m_field_map[ifield_ref]) { auto ordinal = pair.first; auto block = cfg.create_block(); cases.emplace_back(ordinal, block); if (ifield_type == type::java_lang_String()) { const DexString* value = pair.second.string_value; if (value) { block->push_back({dasm(OPCODE_CONST_STRING, value), dasm(IOPCODE_MOVE_RESULT_PSEUDO_OBJECT, {1_v}), dasm(OPCODE_RETURN_OBJECT, {1_v})}); } else { // The `Ljava/lang/String` value is a `null` constant. block->push_back({dasm(OPCODE_CONST, {1_v, 0_L}), dasm(OPCODE_RETURN_OBJECT, {1_v})}); } } else { int64_t value = pair.second.primitive_value; if (type::is_wide_type(ifield_type)) { block->push_back({dasm(OPCODE_CONST_WIDE, {1_v, {LITERAL, value}}), dasm(OPCODE_RETURN_WIDE, {1_v})}); } else { block->push_back({dasm(OPCODE_CONST, {1_v, {LITERAL, value}}), dasm(OPCODE_RETURN, {1_v})}); } } } // Arbitrarily choose the first case block as the default case. always_assert(!cases.empty()); cfg.create_branch(entry, dasm(OPCODE_SWITCH, {0_v}), cases.front().second, cases); cfg.recompute_registers_size(); code->clear_cfg(); } /** * 1. Erase the enum instance fields and synthetic array field which is * usually `$VALUES`. * 2. Delete <init>, values() and valueOf(String) methods, and delete * instructions that construct these fields from <clinit>. */ void clean_generated_methods_fields(DexClass* enum_cls) { auto& sfields = enum_cls->get_sfields(); auto& enum_constants = m_enum_attributes_map[enum_cls->get_type()].m_constants_map; auto synth_field_access = synth_access(); DexField* values_field = nullptr; std20::erase_if(sfields, [&](auto* field) { if (enum_constants.count(field)) { return true; } if (check_required_access_flags(synth_field_access, field->get_access())) { always_assert(!values_field); values_field = field; return true; } return false; }); always_assert(values_field); auto& dmethods = enum_cls->get_dmethods(); // Delete <init>, values() and valueOf(String) methods, and clean <clinit>. std20::erase_if(dmethods, [&, this](auto* method) { if (method::is_clinit(method)) { clean_clinit(enum_constants, enum_cls, method, values_field); return empty(method->get_code()); } return this->is_generated_enum_method(method); }); } /** * Erase enum construction code. Erase the put instructions that write enum * values and synthetic $VALUES array, then erase the dead instructions. * * The code before the transformation: * * new-instance v0 LCandidateEnum; * invoke-direct v0 v1 v2 Ljava/lang/Enum;.<init>:(Ljava/lang/String;I)V * sput-object v0 LCandidateEnum;.f:LCandidateEnum; * ... // maybe more objects construction. * sput-object v3 LCandidateEnum;.$VALUES:[LCandidateEnum; * ... // register v0 may be used. * * The code after the transformation: * * // Deleted. new-instance v0 LCandidateEnum; * // Deleted. invoke-direct v0 v1 v2 * Ljava/lang/Enum;.<init>:(Ljava/lang/String;I)V * // Deleted. sput-object v0 LCandidateEnum;.f:LCandidateEnum; * sget-object v0 LCandidateEnum;.f:LCandidateEnum; * ... // maybe more objects construction. * // Deleted. sput-object v3 LCandidateEnum;.$VALUES:[LCandidateEnum; * ... // register v0 may be used. */ static void clean_clinit(const EnumConstantsMap& enum_constants, DexClass* enum_cls, DexMethod* clinit, DexField* values_field) { auto code = clinit->get_code(); auto ctors = enum_cls->get_ctors(); always_assert(ctors.size() == 1); auto ctor = ctors[0]; side_effects::InvokeToSummaryMap summaries; for (auto it = code->begin(); it != code->end();) { if (it->type != MFLOW_OPCODE) { ++it; continue; } auto insn = it->insn; if (opcode::is_an_sput(insn->opcode())) { auto field = resolve_field(insn->get_field()); if (field && enum_constants.count(field)) { code->insert_before(it, dasm(OPCODE_SGET_OBJECT, field)); code->insert_before( it, dasm(IOPCODE_MOVE_RESULT_PSEUDO_OBJECT, {{VREG, insn->src(0)}})); it = code->erase(it); } else if (field == values_field) { it = code->erase(it); } } else if (opcode::is_invoke_direct(insn->opcode()) && insn->get_method() == ctor) { summaries.emplace(insn, side_effects::Summary()); } ++it; } code->build_cfg(/* editable */ false); auto& cfg = code->cfg(); cfg.calculate_exit_block(); ptrs::FixpointIterator fp_iter(cfg); fp_iter.run(ptrs::Environment()); used_vars::FixpointIterator uv_fpiter(fp_iter, summaries, cfg); uv_fpiter.run(used_vars::UsedVarsSet()); auto dead_instructions = used_vars::get_dead_instructions(*code, uv_fpiter); code->clear_cfg(); for (const auto& insn : dead_instructions) { code->remove_opcode(insn); } // Assert no instruction about the $VALUES field. for (auto& mie : InstructionIterable(code)) { auto insn = mie.insn; always_assert_log(!insn->has_field() || insn->get_field() != values_field, "%s can not be deleted", SHOW(insn)); } } /** * Only use for <clinit> code. */ static bool empty(IRCode* code) { auto iterable = InstructionIterable(code); auto begin = iterable.begin(); return opcode::is_return_void(begin->insn->opcode()); } /** * Whether a method is <init>, values() or valueOf(String). */ bool is_generated_enum_method(DexMethodRef* method) { auto name = method->get_name(); return name == m_enum_util->INIT_METHOD_STR || is_enum_values(method) || is_enum_valueof(method); } /** * Change candidates' superclass from Enum to Object. */ void post_update_enum_classes(Scope& scope) { for (auto cls : scope) { if (!m_enum_attributes_map.count(cls->get_type())) { continue; } always_assert_log(cls->get_super_class() == m_enum_util->ENUM_TYPE, "%s super %s\n", SHOW(cls), SHOW(cls->get_super_class())); cls->set_super_class(m_enum_util->OBJECT_TYPE); cls->set_access(cls->get_access() & ~ACC_ENUM); } } DexType* try_convert_to_int_type(DexType* type) { return m_enum_util->try_convert_to_int_type(m_enum_attributes_map, type); } DexStoresVector& m_stores; uint32_t m_int_objs{0}; // Generated Integer objects. uint32_t m_enum_objs{0}; // Eliminated Enum objects. EnumAttributeMap m_enum_attributes_map; std::unique_ptr<EnumUtil> m_enum_util; }; } // namespace namespace optimize_enums { /** * Transform enums to Integer objects, return the total number of eliminated * enum objects. */ int transform_enums(const Config& config, DexStoresVector* stores, size_t* num_int_objs) { if (!config.candidate_enums.size()) { return 0; } EnumTransformer transformer(config, stores); transformer.run(); *num_int_objs = transformer.get_int_objs_count(); return transformer.get_enum_objs_count(); } } // namespace optimize_enums