libredex/InstructionAnalyzer.h (348 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <functional>
#include <utility>
#include "IRInstruction.h"
#include "TemplateUtil.h"
/*
* This module provides a way to compose analyses over IRInstructions.
*
* Implementors should define sub-analyzers that inherit from
* InstructionAnalyzerBase. These sub-analyzers can then be composed using
* the InstructionAnalyzerCombiner.
*/
/* clang-format off */
// Opcodes are grouped on the basis that most analyses will want to handle all
// opcodes in a given group similarly.
#define OPCODE_GROUPS \
X(load_param) \
X(nop) \
X(move) \
X(move_result) \
X(move_exception) \
X(return) \
X(monitor) \
X(const) \
X(const_string) \
X(const_class) \
X(check_cast) \
X(instance_of) \
X(array_length) \
X(new_instance) \
X(new_array) \
X(filled_new_array) \
X(fill_array_data) \
X(throw) \
X(goto) \
X(switch) \
X(cmp) \
X(if) \
X(aget) \
X(aput) \
X(iget) \
X(iput) \
X(sget) \
X(sput) \
X(invoke) \
X(unop) \
X(binop) \
X(binop_lit) \
X(init_class)
/* clang-format on */
/*
* A sub-analyzer is simply a description of how to mutate an Environment given
* an IRInstruction.
*
* Sub-analyzers should implement the analyze_* methods for the opcode groups
* they are interested in. These methods should return `false` if they want
* subsequent sub-analyzers to run, and `true` if the analysis for the given
* instruction should terminate. In general, a sub-analysis should return
* `true` if it believes subsequent analyses will not be able to further refine
* the environment.
*
* All sub-analyzers should inherit from this class, which uses CRTP to mimick
* virtual dispatch on static methods. It provides a default implementation for
* all opcode group analyses, which is simply a dispatch to the analyze_default
* method in the derived class.
*
* We use CRTP because we want each sub-analyzer to only comprise static
* methods. That ensures that the compiler can inline and elide as many calls
* as possible. This is important because most sub-analyzers will only define a
* small number of nontrivial instruction analyses.
*
* Limiting ourselves to static methods means that we have no `this` object
* to store state. State is instead passed as an explicit argument to each
* method.
*/
template <typename Derived, typename _Env, typename _State = std::nullptr_t>
class InstructionAnalyzerBase {
public:
using State = _State;
using Env = _Env;
static bool analyze_default(const State& state,
const IRInstruction* insn,
Env* env) {
return false;
}
#define X(opcode_group) \
static bool analyze_wrapper_##opcode_group( \
const State& state, const IRInstruction* insn, Env* env) { \
return Derived::analyze_##opcode_group(state, insn, env); \
} \
static bool analyze_##opcode_group( \
const State& state, const IRInstruction* insn, Env* env) { \
return Derived::analyze_default(state, insn, env); \
}
OPCODE_GROUPS
#undef X
};
/*
* Some sub-analyzers have no need for state. This partial template
* specialization makes them easier to write -- instead of passing an unused
* nullptr around, the analyze_* methods can omit the state parameter entirely.
*/
template <typename Derived, typename _Env>
class InstructionAnalyzerBase<Derived, _Env, std::nullptr_t> {
public:
using State = std::nullptr_t;
using Env = _Env;
static bool analyze_default(const IRInstruction* insn, Env* env) {
return false;
}
// Note that defining a static method in a subclass hides all methods in the
// superclass of the same name, *regardless of signature*. As such, we
// define analyze_wrapper_##opcode_group here instead of overloading the
// signature of analyze_##opcode_group so that the analyze_wrapper methods
// don't get hidden when the analyzer implementor defines its analyze_*
// methods.
#define X(opcode_group) \
static bool analyze_wrapper_##opcode_group( \
std::nullptr_t, const IRInstruction* insn, Env* env) { \
return Derived::analyze_##opcode_group(insn, env); \
} \
static bool analyze_##opcode_group(const IRInstruction* insn, Env* env) { \
return Derived::analyze_default(insn, env); \
}
OPCODE_GROUPS
#undef X
};
/*
* The run() method of this class will run each sub-analyzer in the Analyzers
* list from left to right on the given instruction.
*/
template <typename... Analyzers>
class InstructionAnalyzerCombiner final {
public:
// All Analyzers should have the same Env type.
using Env = typename std::common_type<typename Analyzers::Env...>::type;
~InstructionAnalyzerCombiner() {
static_assert(
template_util::all_true<(
std::is_base_of<InstructionAnalyzerBase<Analyzers,
typename Analyzers::Env,
typename Analyzers::State>,
Analyzers>::value)...>::value,
"Not all analyses inherit from the right instance of "
"InstructionAnalyzerBase!");
}
explicit InstructionAnalyzerCombiner(typename Analyzers::State... states)
: m_states(std::make_tuple(states...)) {}
// If all sub-analyzers have a default-constructible state, then this
// combined analyzer is default-constructible.
template <bool B = template_util::all_true<
(std::is_default_constructible<
typename Analyzers::State>::value)...>::value,
typename = typename std::enable_if_t<B>>
InstructionAnalyzerCombiner()
: m_states(std::make_tuple(typename Analyzers::State()...)) {}
void operator()(const IRInstruction* insn, Env* env) const {
auto op = insn->opcode();
switch (op) {
case IOPCODE_LOAD_PARAM:
case IOPCODE_LOAD_PARAM_OBJECT:
case IOPCODE_LOAD_PARAM_WIDE:
return analyze_load_param(
std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_NOP:
return analyze_nop(std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_MOVE:
case OPCODE_MOVE_WIDE:
case OPCODE_MOVE_OBJECT:
return analyze_move(std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_MOVE_RESULT:
case OPCODE_MOVE_RESULT_WIDE:
case OPCODE_MOVE_RESULT_OBJECT:
case IOPCODE_MOVE_RESULT_PSEUDO:
case IOPCODE_MOVE_RESULT_PSEUDO_OBJECT:
case IOPCODE_MOVE_RESULT_PSEUDO_WIDE:
return analyze_move_result(
std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_MOVE_EXCEPTION:
return analyze_move_exception(
std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_RETURN_VOID:
case OPCODE_RETURN:
case OPCODE_RETURN_WIDE:
case OPCODE_RETURN_OBJECT:
return analyze_return(std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_MONITOR_ENTER:
case OPCODE_MONITOR_EXIT:
return analyze_monitor(
std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_THROW:
return analyze_throw(std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_GOTO:
return analyze_goto(std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_NEG_INT:
case OPCODE_NOT_INT:
case OPCODE_NEG_LONG:
case OPCODE_NOT_LONG:
case OPCODE_NEG_FLOAT:
case OPCODE_NEG_DOUBLE:
case OPCODE_INT_TO_LONG:
case OPCODE_INT_TO_FLOAT:
case OPCODE_INT_TO_DOUBLE:
case OPCODE_LONG_TO_INT:
case OPCODE_LONG_TO_FLOAT:
case OPCODE_LONG_TO_DOUBLE:
case OPCODE_FLOAT_TO_INT:
case OPCODE_FLOAT_TO_LONG:
case OPCODE_FLOAT_TO_DOUBLE:
case OPCODE_DOUBLE_TO_INT:
case OPCODE_DOUBLE_TO_LONG:
case OPCODE_DOUBLE_TO_FLOAT:
case OPCODE_INT_TO_BYTE:
case OPCODE_INT_TO_CHAR:
case OPCODE_INT_TO_SHORT:
return analyze_unop(std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_ARRAY_LENGTH:
return analyze_array_length(
std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_CMPL_FLOAT:
case OPCODE_CMPG_FLOAT:
case OPCODE_CMPL_DOUBLE:
case OPCODE_CMPG_DOUBLE:
case OPCODE_CMP_LONG:
return analyze_cmp(std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_IF_EQ:
case OPCODE_IF_NE:
case OPCODE_IF_LT:
case OPCODE_IF_GE:
case OPCODE_IF_GT:
case OPCODE_IF_LE:
case OPCODE_IF_EQZ:
case OPCODE_IF_NEZ:
case OPCODE_IF_LTZ:
case OPCODE_IF_GEZ:
case OPCODE_IF_GTZ:
case OPCODE_IF_LEZ:
return analyze_if(std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_AGET:
case OPCODE_AGET_WIDE:
case OPCODE_AGET_OBJECT:
case OPCODE_AGET_BOOLEAN:
case OPCODE_AGET_BYTE:
case OPCODE_AGET_CHAR:
case OPCODE_AGET_SHORT:
return analyze_aget(std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_APUT:
case OPCODE_APUT_WIDE:
case OPCODE_APUT_OBJECT:
case OPCODE_APUT_BOOLEAN:
case OPCODE_APUT_BYTE:
case OPCODE_APUT_CHAR:
case OPCODE_APUT_SHORT:
return analyze_aput(std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_ADD_INT:
case OPCODE_SUB_INT:
case OPCODE_MUL_INT:
case OPCODE_DIV_INT:
case OPCODE_REM_INT:
case OPCODE_AND_INT:
case OPCODE_OR_INT:
case OPCODE_XOR_INT:
case OPCODE_SHL_INT:
case OPCODE_SHR_INT:
case OPCODE_USHR_INT:
case OPCODE_ADD_LONG:
case OPCODE_SUB_LONG:
case OPCODE_MUL_LONG:
case OPCODE_DIV_LONG:
case OPCODE_REM_LONG:
case OPCODE_AND_LONG:
case OPCODE_OR_LONG:
case OPCODE_XOR_LONG:
case OPCODE_SHL_LONG:
case OPCODE_SHR_LONG:
case OPCODE_USHR_LONG:
case OPCODE_ADD_FLOAT:
case OPCODE_SUB_FLOAT:
case OPCODE_MUL_FLOAT:
case OPCODE_DIV_FLOAT:
case OPCODE_REM_FLOAT:
case OPCODE_ADD_DOUBLE:
case OPCODE_SUB_DOUBLE:
case OPCODE_MUL_DOUBLE:
case OPCODE_DIV_DOUBLE:
case OPCODE_REM_DOUBLE:
return analyze_binop(std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_ADD_INT_LIT16:
case OPCODE_RSUB_INT:
case OPCODE_MUL_INT_LIT16:
case OPCODE_DIV_INT_LIT16:
case OPCODE_REM_INT_LIT16:
case OPCODE_AND_INT_LIT16:
case OPCODE_OR_INT_LIT16:
case OPCODE_XOR_INT_LIT16:
case OPCODE_ADD_INT_LIT8:
case OPCODE_RSUB_INT_LIT8:
case OPCODE_MUL_INT_LIT8:
case OPCODE_DIV_INT_LIT8:
case OPCODE_REM_INT_LIT8:
case OPCODE_AND_INT_LIT8:
case OPCODE_OR_INT_LIT8:
case OPCODE_XOR_INT_LIT8:
case OPCODE_SHL_INT_LIT8:
case OPCODE_SHR_INT_LIT8:
case OPCODE_USHR_INT_LIT8:
return analyze_binop_lit(
std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_CONST:
case OPCODE_CONST_WIDE:
return analyze_const(std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_CONST_STRING:
return analyze_const_string(
std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_CONST_CLASS:
return analyze_const_class(
std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_FILL_ARRAY_DATA:
return analyze_fill_array_data(
std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_SWITCH:
return analyze_switch(std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_IGET:
case OPCODE_IGET_WIDE:
case OPCODE_IGET_OBJECT:
case OPCODE_IGET_BOOLEAN:
case OPCODE_IGET_BYTE:
case OPCODE_IGET_CHAR:
case OPCODE_IGET_SHORT:
return analyze_iget(std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_IPUT:
case OPCODE_IPUT_WIDE:
case OPCODE_IPUT_OBJECT:
case OPCODE_IPUT_BOOLEAN:
case OPCODE_IPUT_BYTE:
case OPCODE_IPUT_CHAR:
case OPCODE_IPUT_SHORT:
return analyze_iput(std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_SGET:
case OPCODE_SGET_WIDE:
case OPCODE_SGET_OBJECT:
case OPCODE_SGET_BOOLEAN:
case OPCODE_SGET_BYTE:
case OPCODE_SGET_CHAR:
case OPCODE_SGET_SHORT:
return analyze_sget(std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_SPUT:
case OPCODE_SPUT_WIDE:
case OPCODE_SPUT_OBJECT:
case OPCODE_SPUT_BOOLEAN:
case OPCODE_SPUT_BYTE:
case OPCODE_SPUT_CHAR:
case OPCODE_SPUT_SHORT:
return analyze_sput(std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_INVOKE_VIRTUAL:
case OPCODE_INVOKE_SUPER:
case OPCODE_INVOKE_DIRECT:
case OPCODE_INVOKE_STATIC:
case OPCODE_INVOKE_POLYMORPHIC:
case OPCODE_INVOKE_CUSTOM:
case OPCODE_INVOKE_INTERFACE:
return analyze_invoke(std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_CHECK_CAST:
return analyze_check_cast(
std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_INSTANCE_OF:
return analyze_instance_of(
std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_NEW_INSTANCE:
return analyze_new_instance(
std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_NEW_ARRAY:
return analyze_new_array(
std::index_sequence_for<Analyzers...>{}, insn, env);
case OPCODE_FILLED_NEW_ARRAY:
return analyze_filled_new_array(
std::index_sequence_for<Analyzers...>{}, insn, env);
case IOPCODE_INIT_CLASS:
return analyze_init_class(
std::index_sequence_for<Analyzers...>{}, insn, env);
}
}
private:
// Fold expr over a parameter pack.
// See http://articles.emptycrate.com/2016/05/14/folds_in_cpp11_ish.html for
// details.
#define FOLD(expr) \
std::initializer_list<int> { (expr, 0)... }
// Run the sub-analyzers in order, passing them their associated state.
// See
// http://aherrmann.github.io/programming/2016/02/28/unpacking-tuples-in-cpp14/
// for an explanation of how index_sequence is used to extract the correct
// state from the m_states tuple.
#define X(opcode_group) \
template <size_t... Is> \
void analyze_##opcode_group( \
std::index_sequence<Is...>, const IRInstruction* insn, Env* env) const { \
bool run_next{true}; \
FOLD(run_next = run_next && !Analyzers::analyze_wrapper_##opcode_group( \
std::get<Is>(m_states), insn, env)); \
}
OPCODE_GROUPS
#undef X
#undef FOLD
std::tuple<typename Analyzers::State...> m_states;
};
#undef OPCODE_GROUPS
/*
* An instance of InstructionAnalyzerCombiner can be type-erased using
* std::function. We define this alias template for a convenient way to name
* these types.
*/
template <typename Env>
using InstructionAnalyzer =
std::function<void(const IRInstruction* insn, Env* env)>;