service/dataflow/ConstantUses.cpp (419 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
/**
* Background:
*
* Code contains many seemingly redundant const-instructions.
* However, the Android verifier checks how const
* values are used, and it will reject code that uses the same register
* inconsistently along any execution path, where inconsistently means that the
* register is used with conflicting type categories, e.g. once as an int, and
* then again as a float. At a high level, the Android verifier considers three
* type categories for <=32-bit values: int, float, object. All smaller integer
* types are "implicitly" widened to int. And for 64-bit values, there is long,
* double. Some instruction impose exact type demands, e.g. ADD_INT demands an
* int, not a float, and not an object. But IF_EQZ can be given an int or an
* object.
*
* In the actual Android verifier implementation, the exact tracked types are
* much more involved, considering all relevant smaller integer types and their
* relationships. See here:
* https://android.googlesource.com/platform/dalvik/+/android-cts-4.4_r4/vm/analysis/CodeVerify.cpp#186
* However, all that complexity in the Verifier exists to ensure that a value is
* never used in a context where certain bits should be 0 or 1. We can ignore
* these details for constants, as a constant is a constant is a constant.
*
* Our approach:
*
* If we have two const instructions loading the same bit patterns, we can drop
* one if they don't have mismatching type demands along all execution paths. To
* avoid doing a potentially expensive path-sensitive analysis for each pair of
* const instructions, this diff applies one major simplification to the
* problem: For each const instruction, we look at all type demands across all
* execution paths, and compute the intersection of all these demands. In this
* way, if two const instructions have the same combined type demands, then it
* is safe to eliminate one of them. This is quite conservative, but safe.
*
* Note that the combined type demands might be contradictory (represented as
* TypeDemand::Error). Consider the following instructions: const r0, 1234
* if-eqz r1, L1
* add-int r0, r0, r0
* goto L2
* :L1
* add-float r0, r0, r0
* :L2
*
* My understanding is that this is verifiable, as there is no inconsistent
* feasible execution path. However, the path-insensitive combined type demand
* for r0 is inconsistent, and thus r0 will not be considered for elimination.
* This is the most extreme aspect of how this approach to the problem is safe.
*
* A few details on how type demands are computed:
* - For return and return-wide instructions, we look at the method result type.
* - For the the value passed in to an aput or aput-wide we try to determine the
* incoming array type using TypeInference. (I am not sure if TypeInference
* can always decide the array type, due to some simplifications that might
* exist in our type inference implementation. The Android verifier certainly
* is able to always figure out the exact array type if the code verifies.
* Anyway, if in doubt, we can always safely bail out with TypeDemand::Error)
*/
#include "ConstantUses.h"
#include "DexUtil.h"
#include "Show.h"
#include "Trace.h"
namespace constant_uses {
ConstantUses::ConstantUses(const cfg::ControlFlowGraph& cfg, DexMethod* method)
: ConstantUses(cfg,
method ? is_static(method) : true,
method ? method->get_class() : nullptr,
method ? method->get_proto()->get_rtype() : nullptr,
method ? method->get_proto()->get_args() : nullptr,
[method]() { return show(method); }) {}
ConstantUses::ConstantUses(const cfg::ControlFlowGraph& cfg,
bool is_static,
DexType* declaring_type,
DexType* rtype,
DexTypeList* args,
const std::function<std::string()>& method_describer)
: m_reaching_definitions(cfg), m_rtype(rtype) {
m_reaching_definitions.run(reaching_defs::Environment());
bool need_type_inference = false;
for (cfg::Block* block : cfg.blocks()) {
auto env = m_reaching_definitions.get_entry_state_at(block);
for (auto& mie : InstructionIterable(block)) {
IRInstruction* insn = mie.insn;
for (size_t src_index = 0; src_index < insn->srcs_size(); src_index++) {
auto src = insn->src(src_index);
const auto& defs = env.get(src);
if (!defs.is_top() && !defs.is_bottom()) {
for (auto def : defs.elements()) {
auto def_opcode = def->opcode();
if (def_opcode == OPCODE_CONST || def_opcode == OPCODE_CONST_WIDE) {
m_constant_uses[def].emplace_back(insn, src_index);
// So there's an instruction that uses a const value.
// For some uses, get_type_demand(IRInstruction*, size_t) will
// need to know type inference information on operands.
// The following switch logic needs to be kept in sync with that
// actual usage of type inference information.
auto opcode = insn->opcode();
switch (opcode) {
case OPCODE_APUT:
case OPCODE_APUT_WIDE:
if (src_index == 0) {
need_type_inference = true;
}
break;
case OPCODE_IF_EQ:
case OPCODE_IF_NE:
case OPCODE_IF_EQZ:
case OPCODE_IF_NEZ:
need_type_inference = true;
break;
default:
break;
}
}
}
}
}
m_reaching_definitions.analyze_instruction(insn, &env);
}
}
TRACE(CU, 2, "[CU] ConstantUses(%s) need_type_inference:%u",
method_describer().c_str(), need_type_inference);
if (need_type_inference && args) {
m_type_inference.reset(new type_inference::TypeInference(cfg));
m_type_inference->run(is_static, declaring_type, args);
}
}
const std::vector<std::pair<IRInstruction*, size_t>>&
ConstantUses::get_constant_uses(IRInstruction* insn) const {
always_assert(insn->opcode() == OPCODE_CONST ||
insn->opcode() == OPCODE_CONST_WIDE);
auto it = m_constant_uses.find(insn);
return it != m_constant_uses.end() ? it->second : m_no_uses;
}
TypeDemand ConstantUses::get_constant_type_demand(IRInstruction* insn) const {
always_assert(insn->opcode() == OPCODE_CONST ||
insn->opcode() == OPCODE_CONST_WIDE);
TypeDemand type_demand = TypeDemand::None;
for (auto& p : get_constant_uses(insn)) {
type_demand =
(TypeDemand)(type_demand & get_type_demand(p.first, p.second));
if (type_demand == TypeDemand::Error) {
break;
}
}
TRACE(CU, 4, "[CU] type demand of {%s}: %u (%s %s %s %s %s)", SHOW(insn),
type_demand, type_demand & Int ? "Int" : "",
type_demand & Float ? "Float" : "",
type_demand & Object ? "Object" : "", type_demand & Long ? "Long" : "",
type_demand & Double ? "Double" : "");
if (insn->get_literal() != 0) {
type_demand = (TypeDemand)(type_demand & ~TypeDemand::Object);
}
return type_demand;
}
TypeDemand ConstantUses::get_type_demand(DexType* type) {
switch (type->c_str()[0]) {
case 'V':
not_reached();
case 'B':
case 'C':
case 'S':
case 'I':
case 'Z':
return TypeDemand::Int;
case 'J':
return TypeDemand::Long;
case 'F':
return TypeDemand::Float;
case 'D':
return TypeDemand::Double;
default:
return TypeDemand::Object;
}
}
static bool is_non_zero_int(IRType type) {
return type == SCALAR || type == INT || type == CONST;
}
TypeDemand ConstantUses::get_type_demand(IRInstruction* insn,
size_t src_index) const {
always_assert(src_index < insn->srcs_size());
switch (insn->opcode()) {
case OPCODE_GOTO:
case IOPCODE_LOAD_PARAM:
case IOPCODE_LOAD_PARAM_OBJECT:
case IOPCODE_LOAD_PARAM_WIDE:
case OPCODE_NOP:
case IOPCODE_MOVE_RESULT_PSEUDO:
case OPCODE_MOVE_RESULT:
case IOPCODE_MOVE_RESULT_PSEUDO_OBJECT:
case OPCODE_MOVE_RESULT_OBJECT:
case IOPCODE_MOVE_RESULT_PSEUDO_WIDE:
case OPCODE_MOVE_RESULT_WIDE:
case OPCODE_MOVE_EXCEPTION:
case OPCODE_RETURN_VOID:
case OPCODE_CONST:
case OPCODE_CONST_WIDE:
case OPCODE_CONST_STRING:
case OPCODE_CONST_CLASS:
case OPCODE_NEW_INSTANCE:
case OPCODE_SGET:
case OPCODE_SGET_BOOLEAN:
case OPCODE_SGET_BYTE:
case OPCODE_SGET_CHAR:
case OPCODE_SGET_SHORT:
case OPCODE_SGET_WIDE:
case OPCODE_SGET_OBJECT:
case IOPCODE_INIT_CLASS:
not_reached();
case OPCODE_RETURN:
case OPCODE_RETURN_WIDE:
return m_rtype ? get_type_demand(m_rtype) : TypeDemand::Error;
case OPCODE_MOVE:
return TypeDemand::IntOrFloat;
case OPCODE_MOVE_WIDE:
return TypeDemand::LongOrDouble;
case OPCODE_MOVE_OBJECT:
case OPCODE_RETURN_OBJECT:
case OPCODE_MONITOR_ENTER:
case OPCODE_MONITOR_EXIT:
case OPCODE_ARRAY_LENGTH:
case OPCODE_FILL_ARRAY_DATA:
case OPCODE_THROW:
case OPCODE_IGET:
case OPCODE_IGET_BOOLEAN:
case OPCODE_IGET_BYTE:
case OPCODE_IGET_CHAR:
case OPCODE_IGET_SHORT:
case OPCODE_IGET_WIDE:
case OPCODE_IGET_OBJECT:
return TypeDemand::Object;
case OPCODE_CHECK_CAST:
// In the Android verifier, the check-cast instruction updates the assumed
// exact type on the incoming register, even in the case of a zero constant.
// We don't track exact types here, and just bail out.
return TypeDemand::Error;
case OPCODE_INSTANCE_OF:
// The Android verifier in some ART versions match a pattern of
// instance-of + ifXXX, and then may strengthen assumptions on the incoming
// register, even in the case of a zero constant.
// https://android.googlesource.com/platform/art/+/refs/tags/android-10.0.0_r5/runtime/verifier/method_verifier.cc#2683
// We don't track exact types here, and certainly don't want to deal with
// somewhat fragile pattern matching, and so just bail out.
return TypeDemand::Error;
case OPCODE_NEW_ARRAY:
case OPCODE_SWITCH:
case OPCODE_NEG_INT:
case OPCODE_NOT_INT:
case OPCODE_INT_TO_BYTE:
case OPCODE_INT_TO_CHAR:
case OPCODE_INT_TO_SHORT:
case OPCODE_INT_TO_LONG:
case OPCODE_INT_TO_FLOAT:
case OPCODE_INT_TO_DOUBLE:
case OPCODE_ADD_INT:
case OPCODE_SUB_INT:
case OPCODE_MUL_INT:
case OPCODE_AND_INT:
case OPCODE_OR_INT:
case OPCODE_XOR_INT:
case OPCODE_SHL_INT:
case OPCODE_SHR_INT:
case OPCODE_USHR_INT:
case OPCODE_DIV_INT:
case OPCODE_REM_INT:
case OPCODE_ADD_INT_LIT16:
case OPCODE_RSUB_INT:
case OPCODE_MUL_INT_LIT16:
case OPCODE_AND_INT_LIT16:
case OPCODE_OR_INT_LIT16:
case OPCODE_XOR_INT_LIT16:
case OPCODE_ADD_INT_LIT8:
case OPCODE_RSUB_INT_LIT8:
case OPCODE_MUL_INT_LIT8:
case OPCODE_AND_INT_LIT8:
case OPCODE_OR_INT_LIT8:
case OPCODE_XOR_INT_LIT8:
case OPCODE_SHL_INT_LIT8:
case OPCODE_SHR_INT_LIT8:
case OPCODE_USHR_INT_LIT8:
case OPCODE_DIV_INT_LIT16:
case OPCODE_REM_INT_LIT16:
case OPCODE_DIV_INT_LIT8:
case OPCODE_REM_INT_LIT8:
return TypeDemand::Int;
case OPCODE_FILLED_NEW_ARRAY: {
DexType* component_type = type::get_array_component_type(insn->get_type());
return get_type_demand(component_type);
}
case OPCODE_CMPL_FLOAT:
case OPCODE_CMPG_FLOAT:
case OPCODE_NEG_FLOAT:
case OPCODE_FLOAT_TO_INT:
case OPCODE_FLOAT_TO_LONG:
case OPCODE_FLOAT_TO_DOUBLE:
case OPCODE_ADD_FLOAT:
case OPCODE_SUB_FLOAT:
case OPCODE_MUL_FLOAT:
case OPCODE_DIV_FLOAT:
case OPCODE_REM_FLOAT:
return TypeDemand::Float;
case OPCODE_CMPL_DOUBLE:
case OPCODE_CMPG_DOUBLE:
case OPCODE_NEG_DOUBLE:
case OPCODE_DOUBLE_TO_INT:
case OPCODE_DOUBLE_TO_LONG:
case OPCODE_DOUBLE_TO_FLOAT:
case OPCODE_ADD_DOUBLE:
case OPCODE_SUB_DOUBLE:
case OPCODE_MUL_DOUBLE:
case OPCODE_DIV_DOUBLE:
case OPCODE_REM_DOUBLE:
return TypeDemand::Double;
case OPCODE_CMP_LONG:
case OPCODE_NEG_LONG:
case OPCODE_NOT_LONG:
case OPCODE_LONG_TO_INT:
case OPCODE_LONG_TO_FLOAT:
case OPCODE_LONG_TO_DOUBLE:
case OPCODE_ADD_LONG:
case OPCODE_SUB_LONG:
case OPCODE_MUL_LONG:
case OPCODE_AND_LONG:
case OPCODE_OR_LONG:
case OPCODE_XOR_LONG:
case OPCODE_DIV_LONG:
case OPCODE_REM_LONG:
return TypeDemand::Long;
case OPCODE_SHL_LONG:
case OPCODE_SHR_LONG:
case OPCODE_USHR_LONG:
if (src_index == 0) return TypeDemand::Long;
always_assert(src_index == 1);
return TypeDemand::Int;
case OPCODE_IF_EQ:
case OPCODE_IF_NE:
if (m_type_inference) {
auto& type_environments = m_type_inference->get_type_environments();
auto& type_environment = type_environments.at(insn);
auto t1 = type_environment.get_type(insn->src(0));
auto t2 = type_environment.get_type(insn->src(1));
if (!t1.is_top() && !t1.is_bottom() && !t2.is_top() && !t2.is_bottom()) {
if (t1.element() == REFERENCE || t2.element() == REFERENCE) {
return TypeDemand::Object;
}
if (is_non_zero_int(t1.element()) || is_non_zero_int(t2.element())) {
return TypeDemand::Int;
}
return TypeDemand::IntOrObject;
}
} else {
TRACE(CU, 3,
"[CU] if-eq or if-ne instruction encountered {%s}, but type "
"inference is unavailable",
SHOW(insn));
}
return TypeDemand::Error;
case OPCODE_IF_EQZ:
case OPCODE_IF_NEZ:
if (m_type_inference) {
auto& type_environments = m_type_inference->get_type_environments();
auto& type_environment = type_environments.at(insn);
auto t = type_environment.get_type(insn->src(0));
if (!t.is_top() && !t.is_bottom()) {
if (t.element() == REFERENCE) {
return TypeDemand::Object;
}
if (is_non_zero_int(t.element())) {
return TypeDemand::Int;
}
return TypeDemand::IntOrObject;
}
} else {
TRACE(CU, 3,
"[CU] if-eqz or if-nez instruction encountered {%s}, but type "
"inference is unavailable",
SHOW(insn));
}
return TypeDemand::Error;
case OPCODE_IF_LTZ:
case OPCODE_IF_GEZ:
case OPCODE_IF_GTZ:
case OPCODE_IF_LEZ:
return TypeDemand::IntOrObject;
case OPCODE_IF_LT:
case OPCODE_IF_GE:
case OPCODE_IF_GT:
case OPCODE_IF_LE:
return TypeDemand::Int;
case OPCODE_AGET:
case OPCODE_AGET_BOOLEAN:
case OPCODE_AGET_BYTE:
case OPCODE_AGET_CHAR:
case OPCODE_AGET_SHORT:
case OPCODE_AGET_WIDE:
case OPCODE_AGET_OBJECT:
if (src_index == 0) return TypeDemand::Object;
always_assert(src_index == 1);
return TypeDemand::Int;
case OPCODE_APUT:
case OPCODE_APUT_BOOLEAN:
case OPCODE_APUT_BYTE:
case OPCODE_APUT_CHAR:
case OPCODE_APUT_SHORT:
case OPCODE_APUT_WIDE:
case OPCODE_APUT_OBJECT:
if (src_index == 1) return TypeDemand::Object;
if (src_index == 2) return TypeDemand::Int;
always_assert(src_index == 0);
switch (insn->opcode()) {
case OPCODE_APUT:
case OPCODE_APUT_WIDE: {
if (m_type_inference) {
auto& type_environments = m_type_inference->get_type_environments();
auto& type_environment = type_environments.at(insn);
auto dex_type = type_environment.get_dex_type(insn->src(1));
TRACE(CU, 3, "[CU] aput(-wide) instruction array type: %s",
dex_type ? SHOW(dex_type) : "(unknown dex type)");
if (dex_type && type::is_array(*dex_type)) {
auto type_demand =
get_type_demand(type::get_array_component_type(*dex_type));
always_assert(insn->opcode() != OPCODE_APUT ||
(type_demand == TypeDemand::Error ||
type_demand == TypeDemand::Int ||
type_demand == TypeDemand::Float));
always_assert(insn->opcode() != OPCODE_APUT_WIDE ||
(type_demand == TypeDemand::Error ||
type_demand == TypeDemand::Long ||
type_demand == TypeDemand::Double));
return type_demand;
}
} else {
TRACE(CU, 3,
"[CU] aput(-wide) instruction encountered {%s}, but type "
"inference is unavailable",
SHOW(insn));
}
return TypeDemand::Error;
}
case OPCODE_APUT_BOOLEAN:
case OPCODE_APUT_BYTE:
case OPCODE_APUT_CHAR:
case OPCODE_APUT_SHORT:
return TypeDemand::Int;
case OPCODE_APUT_OBJECT:
return TypeDemand::Object;
default:
not_reached();
}
case OPCODE_IPUT:
case OPCODE_IPUT_BOOLEAN:
case OPCODE_IPUT_BYTE:
case OPCODE_IPUT_CHAR:
case OPCODE_IPUT_SHORT:
case OPCODE_IPUT_WIDE:
case OPCODE_IPUT_OBJECT:
if (src_index == 1) return TypeDemand::Object;
always_assert(src_index == 0);
return get_type_demand(insn->get_field()->get_type());
case OPCODE_SPUT:
case OPCODE_SPUT_BOOLEAN:
case OPCODE_SPUT_BYTE:
case OPCODE_SPUT_CHAR:
case OPCODE_SPUT_SHORT:
case OPCODE_SPUT_WIDE:
case OPCODE_SPUT_OBJECT:
return get_type_demand(insn->get_field()->get_type());
case OPCODE_INVOKE_VIRTUAL:
case OPCODE_INVOKE_SUPER:
case OPCODE_INVOKE_DIRECT:
case OPCODE_INVOKE_STATIC:
case OPCODE_INVOKE_INTERFACE: {
DexMethodRef* dex_method = insn->get_method();
const auto* arg_types = dex_method->get_proto()->get_args();
size_t expected_args =
(insn->opcode() != OPCODE_INVOKE_STATIC ? 1 : 0) + arg_types->size();
always_assert(insn->srcs_size() == expected_args);
if (insn->opcode() != OPCODE_INVOKE_STATIC) {
// The first argument is a reference to the object instance on which the
// method is invoked.
if (src_index-- == 0) return TypeDemand::Object;
}
return get_type_demand(arg_types->at(src_index));
}
case OPCODE_INVOKE_CUSTOM:
case OPCODE_INVOKE_POLYMORPHIC:
not_reached_log(
"Unsupported instruction {%s} in ConstantUses::get_type_demand\n",
SHOW(insn));
}
}
bool ConstantUses::has_type_inference() const { return !!m_type_inference; }
} // namespace constant_uses