libredex/ReflectionAnalysis.cpp (1,061 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "ReflectionAnalysis.h"
#include <iomanip>
#include <ostream>
#include <unordered_map>
#include <boost/optional.hpp>
#include "BaseIRAnalyzer.h"
#include "ControlFlow.h"
#include "FiniteAbstractDomain.h"
#include "HashedSetAbstractDomain.h"
#include "IRCode.h"
#include "IRInstruction.h"
#include "IROpcode.h"
#include "PatriciaTreeMapAbstractEnvironment.h"
#include "ReducedProductAbstractDomain.h"
#include "Resolver.h"
#include "Show.h"
#include "Trace.h"
using namespace sparta;
std::ostream& operator<<(std::ostream& out,
const std::unordered_set<DexType*>& x) {
if (x.empty()) {
return out;
}
out << "(";
for (auto i = x.begin(); i != x.end(); ++i) {
out << SHOW(*i);
if (std::next(i) != x.end()) {
out << ",";
}
}
out << ")";
return out;
}
std::ostream& operator<<(std::ostream& out,
const reflection::AbstractObject& x) {
switch (x.obj_kind) {
case reflection::OBJECT: {
out << "OBJECT{" << SHOW(x.dex_type) << x.potential_dex_types << "}";
break;
}
case reflection::INT: {
out << "INT{" << (x.dex_int ? std::to_string(*x.dex_int) : "none") << "}";
break;
}
case reflection::STRING: {
if (x.dex_string != nullptr) {
const std::string& str = x.dex_string->str();
if (str.empty()) {
out << "\"\"";
} else {
out << std::quoted(str);
}
}
break;
}
case reflection::CLASS: {
out << "CLASS{" << SHOW(x.dex_type) << x.potential_dex_types << "}";
break;
}
case reflection::FIELD: {
out << "FIELD{" << SHOW(x.dex_type) << x.potential_dex_types << ":"
<< SHOW(x.dex_string) << "}";
break;
}
case reflection::METHOD: {
out << "METHOD{" << SHOW(x.dex_type) << x.potential_dex_types << ":"
<< SHOW(x.dex_string);
if (x.dex_type_array) {
out << "(";
for (auto type : *x.dex_type_array) {
out << (type ? type->str() : "?");
}
out << ")";
}
out << "}";
break;
}
}
return out;
}
std::ostream& operator<<(std::ostream& out,
const reflection::AbstractObjectDomain& x) {
if (x.is_top()) {
out << "TOP";
} else if (x.is_bottom()) {
out << "BOTTOM";
} else {
out << *(x.get_object());
}
return out;
}
std::ostream& operator<<(std::ostream& out,
const reflection::ClassObjectSource& cls_src) {
switch (cls_src) {
case reflection::NON_REFLECTION: {
out << "NON_REFLECTION";
break;
}
case reflection::REFLECTION: {
out << "REFLECTION";
break;
}
}
return out;
}
std::ostream& operator<<(std::ostream& out,
const reflection::ReflectionAbstractObject& aobj) {
out << aobj.first;
if (aobj.first.obj_kind == reflection::CLASS && aobj.second) {
out << "(" << *aobj.second << ")";
}
return out;
}
std::ostream& operator<<(std::ostream& out,
const reflection::ReflectionSites& sites) {
out << "[";
bool is_first_insn = true;
for (const auto& insn_to_env : sites) {
if (is_first_insn) {
is_first_insn = false;
} else {
out << ", ";
}
out << show(insn_to_env.first) << " -> {";
bool is_first_reg = true;
for (const auto& reg_to_refl_obj : insn_to_env.second) {
if (is_first_reg) {
is_first_reg = false;
} else {
out << ", ";
}
out << "(" << show(reg_to_refl_obj.first) << ", "
<< reg_to_refl_obj.second << ")";
}
out << "}";
}
out << "]";
return out;
}
namespace reflection {
AbstractHeapAddress allocate_heap_address() {
static AbstractHeapAddress addr = 1;
return addr++;
}
bool is_not_reflection_output(const AbstractObject& obj) {
switch (obj.obj_kind) {
case reflection::OBJECT:
case reflection::INT:
case reflection::STRING:
return true;
default:
return false;
}
}
bool operator==(const AbstractObject& x, const AbstractObject& y) {
if (x.obj_kind != y.obj_kind) {
return false;
}
switch (x.obj_kind) {
case INT: {
return x.dex_int == y.dex_int;
}
case OBJECT: {
return x.dex_type == y.dex_type &&
x.potential_dex_types == y.potential_dex_types &&
x.heap_address == y.heap_address &&
x.dex_type_array == y.dex_type_array;
}
case CLASS: {
return x.dex_type == y.dex_type &&
x.potential_dex_types == y.potential_dex_types;
}
case STRING: {
return x.dex_string == y.dex_string;
}
case FIELD: {
return x.dex_type == y.dex_type &&
x.potential_dex_types == y.potential_dex_types &&
x.dex_string == y.dex_string;
}
case METHOD: {
return x.dex_type == y.dex_type &&
x.potential_dex_types == y.potential_dex_types &&
x.dex_string == y.dex_string && x.dex_type_array == y.dex_type_array;
}
}
}
bool operator!=(const AbstractObject& x, const AbstractObject& y) {
return !(x == y);
}
bool AbstractObject::leq(const AbstractObject& other) const {
// Check if `other` is a general CLASS or OBJECT
if (obj_kind == other.obj_kind) {
switch (obj_kind) {
case AbstractObjectKind::INT: {
if (other.dex_int == boost::none) {
return true;
}
break;
}
case AbstractObjectKind::CLASS:
case AbstractObjectKind::OBJECT:
if (dex_type && other.dex_type == nullptr) {
return true;
}
if (dex_type_array && other.dex_type_array == boost::none) {
return true;
}
if (heap_address && other.heap_address == 0) {
return true;
}
break;
case AbstractObjectKind::STRING:
if (other.dex_string == nullptr) {
return true;
}
break;
case AbstractObjectKind::FIELD:
if (other.dex_type == nullptr && other.dex_string == nullptr) {
return true;
}
break;
case AbstractObjectKind::METHOD:
if (other.dex_type == nullptr && other.dex_string == nullptr) {
return true;
}
if (dex_type_array && other.dex_type_array == boost::none) {
return true;
}
break;
}
}
return equals(other);
}
bool AbstractObject::equals(const AbstractObject& other) const {
return *this == other;
}
sparta::AbstractValueKind AbstractObject::join_with(
const AbstractObject& other) {
if (other.leq(*this)) {
// We are higher on the lattice
return sparta::AbstractValueKind::Value;
}
if (obj_kind != other.obj_kind) {
return sparta::AbstractValueKind::Top;
}
switch (obj_kind) {
case AbstractObjectKind::INT:
// Be conservative and drop the int
dex_int = boost::none;
break;
case AbstractObjectKind::OBJECT:
case AbstractObjectKind::CLASS:
// Be conservative and drop the type info
dex_type = nullptr;
heap_address = 0;
dex_type_array = boost::none;
potential_dex_types.clear();
break;
case AbstractObjectKind::STRING:
// Be conservative and drop the string info
dex_string = nullptr;
break;
case AbstractObjectKind::FIELD:
case AbstractObjectKind::METHOD:
// Be conservative and drop the field and method info
dex_type = nullptr;
dex_string = nullptr;
dex_type_array = boost::none;
potential_dex_types.clear();
break;
}
return sparta::AbstractValueKind::Value;
}
sparta::AbstractValueKind AbstractObject::meet_with(
const AbstractObject& other) {
if (leq(other)) {
// We are lower on the lattice
return sparta::AbstractValueKind::Value;
}
if (other.leq(*this)) {
*this = other;
return sparta::AbstractValueKind::Value;
}
return sparta::AbstractValueKind::Bottom;
}
namespace impl {
using namespace ir_analyzer;
using ClassObjectSourceDomain =
sparta::ConstantAbstractDomain<ClassObjectSource>;
using BasicAbstractObjectEnvironment =
PatriciaTreeMapAbstractEnvironment<reg_t, AbstractObjectDomain>;
using ClassObjectSourceEnvironment =
PatriciaTreeMapAbstractEnvironment<reg_t, ClassObjectSourceDomain>;
using HeapClassArrayEnvironment = PatriciaTreeMapAbstractEnvironment<
AbstractHeapAddress,
ConstantAbstractDomain<std::vector<DexType*>>>;
using ReturnValueDomain = AbstractObjectDomain;
class AbstractObjectEnvironment final
: public ReducedProductAbstractDomain<AbstractObjectEnvironment,
BasicAbstractObjectEnvironment,
ClassObjectSourceEnvironment,
HeapClassArrayEnvironment,
ReturnValueDomain,
CallingContextMap> {
public:
using ReducedProductAbstractDomain::ReducedProductAbstractDomain;
static void reduce_product(std::tuple<BasicAbstractObjectEnvironment,
ClassObjectSourceEnvironment,
HeapClassArrayEnvironment,
ReturnValueDomain,
CallingContextMap>& /* product */) {}
AbstractObjectDomain get_abstract_obj(reg_t reg) const {
return get<0>().get(reg);
}
void set_abstract_obj(reg_t reg, const AbstractObjectDomain aobj) {
apply<0>([=](auto env) { env->set(reg, aobj); }, true);
}
void update_abstract_obj(
reg_t reg,
const std::function<AbstractObjectDomain(const AbstractObjectDomain&)>&
operation) {
apply<0>([=](auto env) { env->update(reg, operation); }, true);
}
ClassObjectSourceDomain get_class_source(reg_t reg) const {
return get<1>().get(reg);
}
void set_class_source(reg_t reg, const ClassObjectSourceDomain cls_src) {
apply<1>([=](auto env) { env->set(reg, cls_src); }, true);
}
ConstantAbstractDomain<std::vector<DexType*>> get_heap_class_array(
AbstractHeapAddress addr) const {
return get<2>().get(addr);
}
void set_heap_class_array(
AbstractHeapAddress addr,
const ConstantAbstractDomain<std::vector<DexType*>>& array) {
apply<2>([=](auto env) { env->set(addr, array); }, true);
}
void set_heap_addr_to_top(AbstractHeapAddress addr) {
auto domain = get_heap_class_array(addr);
domain.set_to_top();
set_heap_class_array(addr, domain);
}
ReturnValueDomain get_return_value() const { return get<3>(); }
void join_return_value(const ReturnValueDomain& domain) {
apply<3>([=](auto original) { original->join_with(domain); }, true);
}
CallingContextMap get_calling_context_partition() const { return get<4>(); }
void set_calling_context(const IRInstruction* insn,
const CallingContext& context) {
apply<4>([=](auto partition) { partition->set(insn, context); }, true);
}
};
class Analyzer final : public BaseIRAnalyzer<AbstractObjectEnvironment> {
public:
explicit Analyzer(const DexMethod* dex_method,
const cfg::ControlFlowGraph& cfg,
SummaryQueryFn* summary_query_fn,
const MetadataCache* cache)
: BaseIRAnalyzer(cfg),
m_dex_method(dex_method),
m_cfg(cfg),
m_summary_query_fn(summary_query_fn),
m_cache(cache) {}
void run(CallingContext* context) {
// We need to compute the initial environment by assigning the parameter
// registers their correct abstract object derived from the method's
// signature. The IOPCODE_LOAD_PARAM_* instructions are pseudo-operations
// that are used to specify the formal parameters of the method. They must
// be interpreted separately.
//
// Note that we do not try to infer them as STRINGs.
// Since we don't have the the actual value of the string other than their
// type being String. Also for CLASSes, the exact Java type they refer to is
// not available here.
auto init_state = AbstractObjectEnvironment::top();
m_return_value.set_to_bottom();
const auto* signature = m_dex_method->get_proto()->get_args();
auto sig_it = signature->begin();
param_index_t param_position = 0;
for (const auto& mie :
InstructionIterable(m_cfg.get_param_instructions())) {
IRInstruction* insn = mie.insn;
switch (insn->opcode()) {
case IOPCODE_LOAD_PARAM_OBJECT: {
if (param_position == 0 && !is_static(m_dex_method)) {
// If the method is not static, the first parameter corresponds to
// `this`.
update_non_string_input(&init_state, insn, m_dex_method->get_class());
} else {
// This is a regular parameter of the method.
AbstractObjectDomain param_abstract_obj;
DexType* type = *sig_it;
always_assert(sig_it++ != signature->end());
if (context && (param_abstract_obj = context->get(param_position),
param_abstract_obj.is_value())) {
// Parameter domain is provided with the calling context.
init_state.set_abstract_obj(insn->dest(),
context->get(param_position));
} else {
update_non_string_input(&init_state, insn, type);
}
}
param_position++;
break;
}
case IOPCODE_LOAD_PARAM:
case IOPCODE_LOAD_PARAM_WIDE: {
default_semantics(insn, &init_state);
param_position++;
break;
}
default:
not_reached();
}
}
MonotonicFixpointIterator::run(init_state);
populate_environments(m_cfg);
}
void analyze_instruction(
const IRInstruction* insn,
AbstractObjectEnvironment* current_state) const override {
AbstractObjectDomain callee_return;
callee_return.set_to_bottom();
if (opcode::is_an_invoke(insn->opcode())) {
CallingContext cc;
auto srcs = insn->srcs();
for (param_index_t i = 0; i < srcs.size(); i++) {
reg_t src = insn->src(i);
auto aobj = current_state->get_abstract_obj(src);
cc.set(i, aobj);
}
if (!cc.is_bottom()) {
current_state->set_calling_context(insn, cc);
}
if (m_summary_query_fn) {
callee_return = (*m_summary_query_fn)(insn);
}
}
switch (insn->opcode()) {
case IOPCODE_LOAD_PARAM:
case IOPCODE_LOAD_PARAM_OBJECT:
case IOPCODE_LOAD_PARAM_WIDE: {
// IOPCODE_LOAD_PARAM_* instructions have been processed before the
// analysis.
break;
}
case OPCODE_MOVE:
case OPCODE_MOVE_OBJECT: {
const auto aobj = current_state->get_abstract_obj(insn->src(0));
current_state->set_abstract_obj(insn->dest(), aobj);
const auto obj = aobj.get_object();
if (obj && obj->obj_kind == AbstractObjectKind::CLASS) {
current_state->set_class_source(
insn->dest(), current_state->get_class_source(insn->src(0)));
}
break;
}
case IOPCODE_MOVE_RESULT_PSEUDO_OBJECT:
case OPCODE_MOVE_RESULT_OBJECT: {
const auto aobj = current_state->get_abstract_obj(RESULT_REGISTER);
current_state->set_abstract_obj(insn->dest(), aobj);
const auto obj = aobj.get_object();
if (obj && obj->obj_kind == AbstractObjectKind::CLASS) {
current_state->set_class_source(
insn->dest(), current_state->get_class_source(RESULT_REGISTER));
}
break;
}
case OPCODE_CONST: {
current_state->set_abstract_obj(
insn->dest(),
AbstractObjectDomain(AbstractObject(insn->get_literal())));
break;
}
case OPCODE_CONST_STRING: {
current_state->set_abstract_obj(
RESULT_REGISTER,
AbstractObjectDomain(AbstractObject(insn->get_string())));
break;
}
case OPCODE_CONST_CLASS: {
auto aobj = AbstractObject(AbstractObjectKind::CLASS, insn->get_type());
current_state->set_abstract_obj(RESULT_REGISTER,
AbstractObjectDomain(aobj));
current_state->set_class_source(
RESULT_REGISTER,
ClassObjectSourceDomain(ClassObjectSource::REFLECTION));
break;
}
case OPCODE_CHECK_CAST: {
const auto aobj = current_state->get_abstract_obj(insn->src(0));
current_state->set_abstract_obj(
RESULT_REGISTER,
AbstractObjectDomain(
AbstractObject(AbstractObjectKind::OBJECT, insn->get_type())));
const auto obj = aobj.get_object();
if (obj && obj->obj_kind == AbstractObjectKind::CLASS) {
current_state->set_class_source(
RESULT_REGISTER, current_state->get_class_source(insn->src(0)));
}
// Note that this is sound. In a concrete execution, if the check-cast
// operation fails, an exception is thrown and the control point
// following the check-cast becomes unreachable, which corresponds to
// _|_ in the abstract domain. Any abstract state is a sound
// approximation of _|_.
break;
}
case OPCODE_INSTANCE_OF: {
const auto aobj = current_state->get_abstract_obj(insn->src(0));
auto obj = aobj.get_object();
// Append the referenced type here to the potential dex types list.
// Doing this increases the type information we have at the reflection
// site. It's up to the user of the analysis how to interpret this
// information.
if (obj && (obj->obj_kind == AbstractObjectKind::OBJECT) &&
obj->dex_type) {
auto dex_type = insn->get_type();
if (obj->dex_type != dex_type) {
obj->potential_dex_types.insert(dex_type);
current_state->set_abstract_obj(
insn->src(0),
AbstractObjectDomain(AbstractObject(obj->obj_kind, obj->dex_type,
obj->potential_dex_types)));
}
}
break;
}
case OPCODE_AGET_OBJECT: {
const auto array_object =
current_state->get_abstract_obj(insn->src(0)).get_object();
if (array_object) {
auto type = array_object->dex_type;
if (type && type::is_array(type)) {
const auto etype = type::get_array_component_type(type);
update_non_string_input(current_state, insn, etype);
break;
}
}
default_semantics(insn, current_state);
break;
}
case OPCODE_APUT_OBJECT: {
// insn format: aput <source> <array> <offset>
const auto source_object =
current_state->get_abstract_obj(insn->src(0)).get_object();
const auto array_object =
current_state->get_abstract_obj(insn->src(1)).get_object();
const auto offset_object =
current_state->get_abstract_obj(insn->src(2)).get_object();
if (source_object && source_object->obj_kind == CLASS && array_object &&
array_object->is_known_class_array() && offset_object &&
offset_object->obj_kind == INT) {
auto type = source_object->dex_type;
boost::optional<int64_t> offset = offset_object->dex_int;
boost::optional<std::vector<DexType*>> class_array =
current_state->get_heap_class_array(array_object->heap_address)
.get_constant();
if (offset && class_array && *offset >= 0 &&
class_array->size() > (size_t)*offset) {
(*class_array)[*offset] = type;
current_state->set_heap_class_array(
array_object->heap_address,
ConstantAbstractDomain<std::vector<DexType*>>(*class_array));
}
}
if (source_object && source_object->is_known_class_array()) {
current_state->set_heap_addr_to_top(source_object->heap_address);
}
default_semantics(insn, current_state);
break;
}
case OPCODE_IPUT_OBJECT:
case OPCODE_SPUT_OBJECT: {
const auto source_object =
current_state->get_abstract_obj(insn->src(0)).get_object();
if (source_object && source_object->is_known_class_array()) {
current_state->set_heap_addr_to_top(source_object->heap_address);
}
break;
}
case OPCODE_IGET_OBJECT:
case OPCODE_SGET_OBJECT: {
always_assert(insn->has_field());
const auto field = insn->get_field();
DexType* primitive_type = check_primitive_type_class(field);
if (primitive_type) {
// The field being accessed is a Class object to a primitive type
// likely being used for reflection
auto aobj = AbstractObject(AbstractObjectKind::CLASS, primitive_type);
current_state->set_abstract_obj(RESULT_REGISTER,
AbstractObjectDomain(aobj));
current_state->set_class_source(
RESULT_REGISTER,
ClassObjectSourceDomain(ClassObjectSource::REFLECTION));
} else {
update_non_string_input(current_state, insn, field->get_type());
}
break;
}
case OPCODE_NEW_INSTANCE: {
current_state->set_abstract_obj(
RESULT_REGISTER,
AbstractObjectDomain(
AbstractObject(AbstractObjectKind::OBJECT, insn->get_type())));
break;
}
case OPCODE_NEW_ARRAY: {
auto array_type = insn->get_type();
always_assert(type::is_array(array_type));
auto component_type = type::get_array_component_type(array_type);
if (component_type == type::java_lang_Class()) {
const auto aobj =
current_state->get_abstract_obj(insn->src(0)).get_object();
if (aobj && aobj->obj_kind == INT && aobj->dex_int) {
AbstractHeapAddress addr = allocate_heap_address();
int64_t size = *(aobj->dex_int);
std::vector<DexType*> array(size);
ConstantAbstractDomain<std::vector<DexType*>> heap_array(array);
current_state->set_heap_class_array(addr, heap_array);
current_state->set_abstract_obj(
RESULT_REGISTER,
AbstractObjectDomain(
AbstractObject(AbstractObjectKind::OBJECT, addr)));
break;
}
}
current_state->set_abstract_obj(
RESULT_REGISTER,
AbstractObjectDomain(
AbstractObject(AbstractObjectKind::OBJECT, insn->get_type())));
break;
}
case OPCODE_FILLED_NEW_ARRAY: {
auto array_type = insn->get_type();
always_assert(type::is_array(array_type));
auto component_type = type::get_array_component_type(array_type);
AbstractObject aobj(AbstractObjectKind::OBJECT, insn->get_type());
if (component_type == type::java_lang_Class()) {
auto arg_count = insn->srcs_size();
std::vector<DexType*> known_types;
known_types.reserve(arg_count);
// collect known types from the filled new array
for (auto src_reg : insn->srcs()) {
auto reg_obj = current_state->get_abstract_obj(src_reg).get_object();
if (reg_obj && reg_obj->obj_kind == CLASS && reg_obj->dex_type) {
known_types.push_back(reg_obj->dex_type);
}
}
if (known_types.size() == arg_count) {
AbstractHeapAddress addr = allocate_heap_address();
ConstantAbstractDomain<std::vector<DexType*>> heap_array(known_types);
current_state->set_heap_class_array(addr, heap_array);
aobj = AbstractObject(AbstractObjectKind::OBJECT, addr);
}
}
current_state->set_abstract_obj(RESULT_REGISTER,
AbstractObjectDomain(aobj));
break;
}
case OPCODE_INVOKE_VIRTUAL: {
auto receiver =
current_state->get_abstract_obj(insn->src(0)).get_object();
if (!receiver) {
update_return_object_and_invalidate_heap_args(current_state, insn,
callee_return);
break;
}
process_virtual_call(insn, *receiver, current_state, callee_return);
break;
}
case OPCODE_INVOKE_STATIC: {
if (insn->get_method() == m_cache->for_name) {
auto class_name =
current_state->get_abstract_obj(insn->src(0)).get_object();
if (class_name && class_name->obj_kind == STRING) {
if (class_name->dex_string != nullptr) {
auto internal_name =
DexString::make_string(java_names::external_to_internal(
class_name->dex_string->str()));
current_state->set_abstract_obj(
RESULT_REGISTER,
AbstractObjectDomain(
AbstractObject(AbstractObjectKind::CLASS,
DexType::make_type(internal_name))));
} else {
current_state->set_abstract_obj(
RESULT_REGISTER,
AbstractObjectDomain(
AbstractObject(AbstractObjectKind::CLASS, nullptr)));
}
current_state->set_class_source(
RESULT_REGISTER,
ClassObjectSourceDomain(ClassObjectSource::REFLECTION));
break;
}
}
update_return_object_and_invalidate_heap_args(current_state, insn,
callee_return);
break;
}
case OPCODE_INVOKE_INTERFACE:
case OPCODE_INVOKE_SUPER:
case OPCODE_INVOKE_DIRECT: {
update_return_object_and_invalidate_heap_args(current_state, insn,
callee_return);
break;
}
case OPCODE_RETURN_OBJECT: {
this->m_return_value.join_with(
current_state->get_abstract_obj(insn->src(0)));
break;
}
default: {
default_semantics(insn, current_state);
}
}
}
boost::optional<AbstractObject> get_abstract_object(
size_t reg, IRInstruction* insn) const {
auto it = m_environments.find(insn);
if (it == m_environments.end()) {
return boost::none;
}
return it->second.get_abstract_obj(reg).get_object();
}
boost::optional<ClassObjectSource> get_class_source(
size_t reg, IRInstruction* insn) const {
auto it = m_environments.find(insn);
if (it == m_environments.end()) {
return boost::none;
}
return it->second.get_class_source(reg).get_constant();
}
AbstractObjectDomain get_return_value() const { return m_return_value; }
AbstractObjectEnvironment get_exit_state() const {
return get_exit_state_at(m_cfg.exit_block());
}
private:
const DexMethod* m_dex_method;
const cfg::ControlFlowGraph& m_cfg;
std::unordered_map<IRInstruction*, AbstractObjectEnvironment> m_environments;
mutable AbstractObjectDomain m_return_value;
SummaryQueryFn* m_summary_query_fn;
const MetadataCache* m_cache;
void update_non_string_input(AbstractObjectEnvironment* current_state,
const IRInstruction* insn,
DexType* type) const {
auto dest_reg =
insn->has_move_result_any() ? RESULT_REGISTER : insn->dest();
if (type == type::java_lang_Class()) {
// We don't have precise type information to which the Class obj refers
// to.
current_state->set_abstract_obj(dest_reg,
AbstractObjectDomain(AbstractObject(
AbstractObjectKind::CLASS, nullptr)));
current_state->set_class_source(
dest_reg, ClassObjectSourceDomain(ClassObjectSource::NON_REFLECTION));
} else {
current_state->set_abstract_obj(dest_reg,
AbstractObjectDomain(AbstractObject(
AbstractObjectKind::OBJECT, type)));
}
}
DexType* check_primitive_type_class(const DexFieldRef* field) const {
auto type = m_cache->primitive_field_to_type.find(field);
if (type != m_cache->primitive_field_to_type.end()) {
return type->second;
} else {
return nullptr;
}
}
void update_return_object_and_invalidate_heap_args(
AbstractObjectEnvironment* current_state,
const IRInstruction* insn,
const AbstractObjectDomain& callee_return) const {
invalidate_argument_heap_objects(current_state, insn);
DexMethodRef* callee = insn->get_method();
DexType* return_type = callee->get_proto()->get_rtype();
if (type::is_void(return_type) || !type::is_object(return_type)) {
return;
}
if (callee_return.is_value()) {
current_state->set_abstract_obj(RESULT_REGISTER, callee_return);
} else {
update_non_string_input(current_state, insn, return_type);
}
}
void default_semantics(const IRInstruction* insn,
AbstractObjectEnvironment* current_state) const {
// For instructions that are transparent for this analysis, we just need
// to clobber the destination registers in the abstract environment. Note
// that this also covers the MOVE_RESULT_* and MOVE_RESULT_PSEUDO_*
// instructions following operations that are not considered by this
// analysis. Hence, the effect of those operations is correctly abstracted
// away regardless of the size of the destination register.
if (insn->has_dest()) {
current_state->set_abstract_obj(insn->dest(),
AbstractObjectDomain::top());
if (insn->dest_is_wide()) {
current_state->set_abstract_obj(insn->dest() + 1,
AbstractObjectDomain::top());
}
}
// We need to invalidate RESULT_REGISTER if the instruction writes into
// this register.
if (insn->has_move_result_any()) {
current_state->set_abstract_obj(RESULT_REGISTER,
AbstractObjectDomain::top());
}
}
const DexString* get_dex_string_from_insn(
AbstractObjectEnvironment* current_state,
const IRInstruction* insn,
reg_t reg) const {
auto element_name =
current_state->get_abstract_obj(insn->src(reg)).get_object();
if (element_name && element_name->obj_kind == STRING) {
return element_name->dex_string;
} else {
return nullptr;
}
}
bool is_method_known_to_preserve_args(DexMethodRef* method) const {
const std::set<DexMethodRef*, dexmethods_comparator> known_methods{
m_cache->get_method,
m_cache->get_declared_method,
};
return known_methods.count(method);
}
void invalidate_argument_heap_objects(
AbstractObjectEnvironment* current_state,
const IRInstruction* insn) const {
if (!insn->has_method() ||
is_method_known_to_preserve_args(insn->get_method())) {
return;
}
for (const auto reg : insn->srcs()) {
auto aobj = current_state->get_abstract_obj(reg).get_object();
if (!aobj) {
continue;
}
auto addr = aobj->heap_address;
if (!addr) {
continue;
}
current_state->set_heap_addr_to_top(addr);
}
}
void process_virtual_call(const IRInstruction* insn,
const AbstractObject& receiver,
AbstractObjectEnvironment* current_state,
const AbstractObjectDomain& callee_return) const {
DexMethodRef* callee = insn->get_method();
switch (receiver.obj_kind) {
case INT: {
// calling on int, not valid
break;
}
case OBJECT: {
if (callee == m_cache->get_class) {
current_state->set_abstract_obj(
RESULT_REGISTER,
AbstractObjectDomain(AbstractObject(AbstractObjectKind::CLASS,
receiver.dex_type,
receiver.potential_dex_types)));
current_state->set_class_source(
RESULT_REGISTER,
ClassObjectSourceDomain(ClassObjectSource::REFLECTION));
return;
}
break;
}
case STRING: {
if (callee == m_cache->get_class) {
current_state->set_abstract_obj(
RESULT_REGISTER,
AbstractObjectDomain(AbstractObject(AbstractObjectKind::CLASS,
type::java_lang_String())));
current_state->set_class_source(
RESULT_REGISTER,
ClassObjectSourceDomain(ClassObjectSource::REFLECTION));
return;
}
break;
}
case CLASS: {
AbstractObjectKind element_kind;
const DexString* element_name = nullptr;
boost::optional<std::vector<DexType*>> method_param_types = boost::none;
if (callee == m_cache->get_method ||
callee == m_cache->get_declared_method) {
element_kind = METHOD;
element_name = get_dex_string_from_insn(current_state, insn, 1);
auto arr_reg = insn->src(2); // holds java.lang.Class array
auto arr_obj = current_state->get_abstract_obj(arr_reg).get_object();
if (arr_obj && arr_obj->is_known_class_array()) {
auto maybe_array =
current_state->get_heap_class_array(arr_obj->heap_address)
.get_constant();
if (maybe_array) {
method_param_types = *maybe_array;
}
}
} else if (callee == m_cache->get_constructor ||
callee == m_cache->get_declared_constructor) {
element_kind = METHOD;
element_name = DexString::get_string("<init>");
auto arr_reg = insn->src(1);
auto arr_obj = current_state->get_abstract_obj(arr_reg).get_object();
if (arr_obj && arr_obj->is_known_class_array()) {
auto maybe_array =
current_state->get_heap_class_array(arr_obj->heap_address)
.get_constant();
if (maybe_array) {
method_param_types = *maybe_array;
}
}
} else if (callee == m_cache->get_field ||
callee == m_cache->get_declared_field) {
element_kind = FIELD;
element_name = get_dex_string_from_insn(current_state, insn, 1);
} else if (callee == m_cache->get_fields ||
callee == m_cache->get_declared_fields) {
element_kind = FIELD;
element_name = DexString::get_string("");
} else if (callee == m_cache->get_methods ||
callee == m_cache->get_declared_methods) {
element_kind = METHOD;
element_name = DexString::get_string("");
} else if (callee == m_cache->get_constructors ||
callee == m_cache->get_declared_constructors) {
element_kind = METHOD;
element_name = DexString::get_string("<init>");
}
if (element_name == nullptr) {
break;
}
AbstractObject aobj(element_kind,
receiver.dex_type,
element_name,
receiver.potential_dex_types);
if (method_param_types) {
aobj.dex_type_array = method_param_types;
}
current_state->set_abstract_obj(RESULT_REGISTER,
AbstractObjectDomain(aobj));
return;
}
case FIELD:
case METHOD: {
if ((receiver.obj_kind == FIELD && callee == m_cache->get_field_name) ||
(receiver.obj_kind == METHOD && callee == m_cache->get_method_name)) {
current_state->set_abstract_obj(
RESULT_REGISTER,
AbstractObjectDomain(AbstractObject(receiver.dex_string)));
return;
}
break;
}
}
update_return_object_and_invalidate_heap_args(current_state, insn,
callee_return);
}
// After the fixpoint iteration completes, we replay the analysis on all
// blocks and we cache the abstract state at each instruction. This cache is
// used by get_abstract_object() to query the state of a register at a given
// instruction. Since we use an abstract domain based on Patricia trees, the
// memory footprint of storing the abstract state at each program point is
// small.
void populate_environments(const cfg::ControlFlowGraph& cfg) {
// We reserve enough space for the map in order to avoid repeated
// rehashing during the computation.
m_environments.reserve(cfg.blocks().size() * 16);
for (cfg::Block* block : cfg.blocks()) {
AbstractObjectEnvironment current_state = get_entry_state_at(block);
for (auto& mie : InstructionIterable(block)) {
IRInstruction* insn = mie.insn;
m_environments.emplace(insn, current_state);
analyze_instruction(insn, ¤t_state);
}
}
}
};
} // namespace impl
ReflectionAnalysis::~ReflectionAnalysis() {
if (m_fallback_cache) {
delete m_fallback_cache;
m_fallback_cache = nullptr;
}
}
ReflectionAnalysis::ReflectionAnalysis(DexMethod* dex_method,
CallingContext* context,
SummaryQueryFn* summary_query_fn,
const MetadataCache* cache)
: m_dex_method(dex_method) {
always_assert(dex_method != nullptr);
IRCode* code = dex_method->get_code();
if (code == nullptr) {
return;
}
code->build_cfg(/* editable */ false);
cfg::ControlFlowGraph& cfg = code->cfg();
cfg.calculate_exit_block();
if (!cache) {
m_fallback_cache = new MetadataCache;
cache = m_fallback_cache;
}
m_analyzer = std::make_unique<impl::Analyzer>(dex_method, cfg,
summary_query_fn, cache);
m_analyzer->run(context);
}
void ReflectionAnalysis::get_reflection_site(
const reg_t reg,
IRInstruction* insn,
std::map<reg_t, ReflectionAbstractObject>* abstract_objects) const {
auto aobj = m_analyzer->get_abstract_object(reg, insn);
if (!aobj) {
return;
}
if (is_not_reflection_output(*aobj)) {
return;
}
boost::optional<ClassObjectSource> cls_src =
aobj->obj_kind == AbstractObjectKind::CLASS
? m_analyzer->get_class_source(reg, insn)
: boost::none;
if (aobj->obj_kind == AbstractObjectKind::CLASS &&
cls_src == ClassObjectSource::NON_REFLECTION) {
return;
}
if (traceEnabled(REFL, 5)) {
std::ostringstream out;
out << "reg " << reg << " " << *aobj << " ";
if (cls_src) {
out << *cls_src;
}
out << std::endl;
TRACE(REFL, 5, " reflection site: %s", out.str().c_str());
}
(*abstract_objects)[reg] = ReflectionAbstractObject(*aobj, cls_src);
}
ReflectionSites ReflectionAnalysis::get_reflection_sites() const {
ReflectionSites reflection_sites;
auto code = m_dex_method->get_code();
if (code == nullptr) {
return reflection_sites;
}
auto reg_size = code->get_registers_size();
for (auto& mie : InstructionIterable(code)) {
IRInstruction* insn = mie.insn;
std::map<reg_t, ReflectionAbstractObject> abstract_objects;
for (size_t i = 0; i < reg_size; i++) {
get_reflection_site(i, insn, &abstract_objects);
}
get_reflection_site(RESULT_REGISTER, insn, &abstract_objects);
if (!abstract_objects.empty()) {
reflection_sites.push_back(std::make_pair(insn, abstract_objects));
}
}
return reflection_sites;
}
AbstractObjectDomain ReflectionAnalysis::get_return_value() const {
if (!m_analyzer) {
// Method has no code, or is a native method.
return AbstractObjectDomain::top();
}
return m_analyzer->get_return_value();
}
boost::optional<std::vector<DexType*>> ReflectionAnalysis::get_method_params(
IRInstruction* invoke_insn) const {
auto code = m_dex_method->get_code();
IRInstruction* move_result_insn = nullptr;
auto ii = InstructionIterable(code);
for (auto it = ii.begin(); it != ii.end(); ++it) {
auto* insn = it->insn;
if (insn == invoke_insn) {
move_result_insn = std::next(it)->insn;
break;
}
}
if (!move_result_insn ||
!opcode::is_a_move_result(move_result_insn->opcode())) {
return boost::none;
}
auto arg_param = get_abstract_object(RESULT_REGISTER, move_result_insn);
if (!arg_param ||
arg_param->obj_kind != reflection::AbstractObjectKind::METHOD) {
return boost::none;
}
return arg_param->dex_type_array;
}
bool ReflectionAnalysis::has_found_reflection() const {
return !get_reflection_sites().empty();
}
boost::optional<AbstractObject> ReflectionAnalysis::get_abstract_object(
size_t reg, IRInstruction* insn) const {
if (m_analyzer == nullptr) {
return boost::none;
}
return m_analyzer->get_abstract_object(reg, insn);
}
boost::optional<ClassObjectSource> ReflectionAnalysis::get_class_source(
size_t reg, IRInstruction* insn) const {
if (m_analyzer == nullptr) {
return boost::none;
}
return m_analyzer->get_class_source(reg, insn);
}
CallingContextMap ReflectionAnalysis::get_calling_context_partition() const {
if (m_analyzer == nullptr) {
return CallingContextMap::top();
}
return this->m_analyzer->get_exit_state().get_calling_context_partition();
}
} // namespace reflection