libredex/GraphVisualizer.cpp (585 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "GraphVisualizer.h"
#include <fstream>
#include <iostream>
#include <boost/algorithm/string.hpp>
#include <boost/range/adaptors.hpp>
#include "ControlFlow.h"
#include "Debug.h"
#include "DexPosition.h"
#include "IRCode.h"
#include "Show.h"
// The "Hotspot Client Compiler Visualizer" (c1visualizer) is a tool consuming
// Hotspot C1 compiler debug info to display control flow graphs of compilation
// passes. The format is not well-specified, the best definition is the parser
// itself, which can be found here:
//
// https://github.com/zakkak/c1visualizer/blob/master/Compilation%20Model/src/at/ssw/visualizer/parser/CompilerOutput.atg
//
// as well as accompanying code on how to parse some textual data.
//
// A cfg file contains a set of compilations which are denoted by a compilation
// header ("begin_compilation" to "end_compilation") and associated CFGs
// ("begin_cfg" to "end_cfg"). CFGs are made up of connected blocks that contain
// different forms of supported representation (HIR, LR, IR, bytecode; we only
// use HIR at this point).
//
// A (shortened) sample file may look like this:
//
// begin_compilation
// name "static void foo.bar()"
// method "static void foo.bar()"
// date 1576632329
// end_compilation
// begin_cfg
// name "Initial"
// begin_block
// name "B18446744073709551615"
// from_bci -1
// to_bci -1
// predecessors
// successors "B0"
// xhandlers
// flags
// begin_states
// begin_locals
// size 0
// method "none"
// end_locals
// end_states
// begin_HIR
// 0 0 info0 INFO data:static void foo.bar() <|@
// end_HIR
// end_block
// begin_block
// name "B0"
// from_bci -1
// to_bci -1
// predecessors
// successors
// xhandlers
// flags
// begin_states
// [...]
// end_states
// begin_HIR
// 0 0 i0 CONST [v0] literal:0 <|@
// [...]
// 0 0 i9 RETURN_VOID <|@
// end_HIR
// end_block
// end_cfg
// begin_cfg
// name "First pass"
// [...]
namespace visualizer {
using namespace cfg;
namespace {
// A "container" that helps with formatting list. Use as a helper instance, add
// elements to the stream given by next(), and create the final format with the
// supplied operator<<.
class List {
public:
explicit List() : m_empty(true) {}
std::ostream& next() {
if (m_empty) {
m_empty = false;
} else {
m_stream << ",";
}
return m_stream;
}
private:
bool m_empty;
std::ostringstream m_stream;
friend std::ostream& operator<<(std::ostream& os, const List& list);
};
std::ostream& operator<<(std::ostream& os, const List& list) {
os << '[' << list.m_stream.str() << ']';
return os;
}
// Base class for "tagged" element formatting. Knows how to format blocks (and
// provides an RAII wrapper) as well as some of the textual format (plain
// values that may be quoted, attributes that are key-value pairs).
class TaggedBase {
public:
explicit TaggedBase(std::ostream& o) : m_output(o), m_indent(0) {}
void start_tag(const char* name) {
indent();
m_output << "begin_" << name << std::endl;
m_indent++;
}
void end_tag(const char* name) {
m_indent--;
indent();
m_output << "end_" << name << std::endl;
}
struct TagRAII {
explicit TagRAII(TaggedBase* p, const char* tag) : p(p), tag(tag) {
p->start_tag(tag);
}
~TagRAII() { p->end_tag(tag); }
TaggedBase* p;
const char* tag;
};
void indent() {
for (size_t i = 0; i < m_indent; ++i) {
m_output << " ";
}
}
// QuotedEnum and ValueStream are helpers for printing (complex) values.
// ValueStream will take care of quotation, printing the right string on
// _destruction, so ensure that a ValueStream does not exist longer than
// necessary, i.e., best-practice is to never keep one explicitly.
enum QuotedEnum {
QUOTED,
NOT_QUOTED,
};
template <QuotedEnum quoted>
struct ValueStream final {
std::ostringstream oss;
std::ostream* trg = nullptr;
bool disposed = false;
explicit ValueStream(std::ostream* trg) : trg(trg) {}
// Only exists for resolution. Will fail when used.
ValueStream(const ValueStream&) : disposed(true) { not_reached(); }
ValueStream(ValueStream&& rhs) noexcept
: oss(std::move(rhs.oss)), trg(rhs.trg) {
rhs.disposed = true;
}
~ValueStream() {
if (disposed) {
return;
}
auto tmp = oss.str();
if (!tmp.empty()) {
bool is_quoted = quoted == QUOTED;
*trg << (is_quoted ? "\"" : "") << tmp << (is_quoted ? "\"" : "");
}
*trg << std::endl;
}
// See copy constructor. Assignment operators for lint.
ValueStream& operator=(const ValueStream&) { not_reached(); }
ValueStream& operator=(ValueStream&& rhs) noexcept {
oss = std::move(rhs.oss);
trg = rhs.trg;
rhs.disposed = true;
return *this;
}
template <typename T>
ValueStream& operator<<(const T& val) {
oss << val;
return *this;
}
};
template <QuotedEnum quoted = NOT_QUOTED>
ValueStream<quoted> value(const std::string& name) {
indent();
m_output << name << ' ';
return ValueStream<quoted>(&m_output);
}
std::ostream& attribute() {
m_output << " ";
return m_output;
}
std::ostream& attribute(const std::string& attr) {
m_output << " ";
always_assert_log(!attr.empty(), "Attribute must be non-empty");
always_assert_log(attr.find(' ') == std::string::npos,
"Attribute must not have spaces");
return m_output << attr << ":";
}
protected:
std::ostream& m_output;
size_t m_indent;
};
// Base printer that can format c1visualizer blocks filled with Redex elements
// as HIR. Generic to allow both ControlFlowGraph as well as IRList input in
// specialized implementations.
template <typename T>
class CodeVisualizer : public TaggedBase {
public:
explicit CodeVisualizer(std::ostream& output) : TaggedBase(output) {}
virtual ~CodeVisualizer() {}
static void dex_string(std::ostream& os, const DexString* s) {
os << (s ? s->str() : "<null>");
}
void instruction(IRInstruction* insn) {
m_output << SHOW(insn->opcode());
if (insn->srcs_size() || insn->has_dest()) {
List input_list;
if (insn->has_dest()) {
input_list.next() << "v" << insn->dest();
}
for (reg_t src : insn->srcs()) {
input_list.next() << "v" << src;
}
attribute() << input_list;
}
if (insn->has_method()) {
attribute("method_name") << show(insn->get_method());
}
if (insn->has_field()) {
attribute("field_name") << show(insn->get_field());
}
if (insn->has_type()) {
attribute("type") << show(insn->get_type());
}
if (insn->has_literal()) {
attribute("literal") << insn->get_literal();
}
if (insn->has_string()) {
dex_string(attribute("string"), insn->get_string());
}
}
void mie_position(const MethodItemEntry& mie) {
m_output << mie.type;
if (mie.pos) {
m_output << " \"";
DexPosition& pos = *mie.pos;
if (pos.method != nullptr) {
m_output << pos.method->str();
} else {
m_output << "<unnamed-method>";
}
m_output << "(";
if (pos.file != nullptr) {
m_output << pos.file->str() << ":" << pos.line;
} else {
m_output << "<no-file>";
}
m_output << ")\"";
}
}
void source_block(const SourceBlock& sb) {
m_output << MFLOW_SOURCE_BLOCK << " " << sb.show(/*quoted_src=*/true);
}
template <typename... Args>
void mie(const MethodItemEntry& mie, Args... args) {
switch (mie.type) {
case MFLOW_TRY:
case MFLOW_CATCH:
case MFLOW_DEX_OPCODE:
case MFLOW_TARGET:
case MFLOW_DEBUG:
case MFLOW_FALLTHROUGH:
m_output << mie.type;
return;
case MFLOW_POSITION:
static_cast<T*>(this)->mie_position(mie);
return;
case MFLOW_OPCODE:
static_cast<T*>(this)->instruction(mie.insn, args...);
return;
case MFLOW_SOURCE_BLOCK:
static_cast<T*>(this)->source_block(*mie.src_block);
return;
}
}
// bci (bytecode index) & num_uses are required by the format.
void mie_prefix(size_t bci, size_t num_uses) {
indent();
m_output << bci << " " << num_uses;
}
void mie_prefix(size_t bci, size_t num_uses, size_t insn_id) {
mie_prefix(bci, num_uses);
m_output << " i" << insn_id << " ";
}
void mie_suffix() { m_output << " <|@" << std::endl; }
template <typename C, typename... Other>
void mie_list(C* b, Other... other) {
for (auto& m : *b) {
// Using 0 for bci and num_uses as we don't have/need this.
mie_prefix(0, 0, static_cast<T*>(this)->mie_id(m));
mie(m, b, other...);
mie_suffix();
}
}
template <typename C, typename IdT, typename HIRFn>
void block(C* block, IdT id, bool exc, HIRFn hir) {
TagRAII block_tag(this, "block");
value<QUOTED>("name") << "B" << id;
value("from_bci") << -1;
value("to_bci") << -1;
static_cast<T*>(this)->predecessors(block);
static_cast<T*>(this)->successors(block);
static_cast<T*>(this)->exception_handlers(block);
value<QUOTED>("flags") << (exc ? "catch_block" : "");
{
// TODO: Is this really necessary? If so, fill in correctly.
TagRAII states_tag(this, "states");
TagRAII locals_tag(this, "locals");
value("size") << 0;
value<QUOTED>("method") << "none";
}
{
TagRAII hir_tag(this, "HIR");
hir(block);
}
}
};
using namespace boost::adaptors;
class CFGVisualizer : public CodeVisualizer<CFGVisualizer> {
public:
CFGVisualizer(ControlFlowGraph* cfg, std::ostream& output)
: CodeVisualizer(output), m_cfg(cfg) {
if (m_cfg) {
prepare();
}
}
virtual ~CFGVisualizer() {}
template <typename T>
void blocklist(const std::string& name, const T& blocks) {
indent();
m_output << name;
for (auto b : blocks) {
m_output << " \"B" << b->id() << "\" ";
}
m_output << std::endl;
}
void predecessors(Block* block) {
blocklist("predecessors",
transform(block->preds(), [](Edge* e) { return e->src(); }));
}
static bool is_throw_edge(const Edge* e) { return e->type() == EDGE_THROW; }
static bool is_not_throw_edge(const Edge* e) { return !is_throw_edge(e); }
template <typename T>
std::vector<Edge*> get_succ_edges(Block* block, T& fn) {
return m_cfg ? m_cfg->get_succ_edges_if(block, fn) : std::vector<Edge*>{};
}
void successors(Block* block) {
blocklist("successors",
transform(get_succ_edges(block, is_not_throw_edge),
[](Edge* e) { return e->target(); }));
}
void exception_handlers(Block* block) {
blocklist("xhandlers",
transform(get_succ_edges(block, is_throw_edge),
[](Edge* e) { return e->target(); }));
}
void instruction(IRInstruction* insn, Block* from) {
CodeVisualizer::instruction(insn);
if (opcode::is_a_conditional_branch(insn->opcode())) {
auto edge = m_cfg->get_succ_edge_if(
from, [](Edge* e) { return e->type() == EDGE_BRANCH; });
redex_assert(edge);
attribute("target") << "B" << edge->target()->id();
}
}
size_t mie_id(const MethodItemEntry& mie) const {
return m_mie_id_map.at(&mie);
}
void prefix_block(Block* b, const std::string& prefix) {
auto fake_insn = [&](Block*) {
mie_prefix(0, 0);
m_output << " info0 INFO";
attribute("data") << prefix;
mie_suffix();
};
CodeVisualizer::block(b, b->id(), false, fake_insn);
}
void prepare() {
size_t index = 0;
for (auto block : m_cfg->blocks()) {
for (auto& mie : *block) {
m_mie_id_map[&mie] = index;
if (mie.type == MFLOW_OPCODE) {
m_insn_id_map[mie.insn] = index;
}
++index;
}
for (auto edge : m_cfg->get_succ_edges_if(block, is_throw_edge)) {
m_exc_blocks.insert(edge->target());
}
if (m_cfg->get_pred_edge_if(block, is_throw_edge)) {
m_exc_blocks.insert(block);
}
}
}
void cfg(const std::string& name, const optional<std::string>& prefix) {
{
TagRAII cfg_tag(this, "cfg");
value<QUOTED>("name") << name;
if (prefix) {
Block fake_block(m_cfg, std::numeric_limits<BlockId>::max());
auto first_real = (!m_cfg || m_cfg->blocks().empty())
? nullptr
: *m_cfg->blocks().begin();
Edge fake_edge(&fake_block, first_real ? first_real : &fake_block,
EDGE_GOTO);
const_cast<std::vector<Edge*>&>(fake_block.succs())
.push_back(&fake_edge);
prefix_block(&fake_block, *prefix);
}
if (m_cfg) {
for (auto b : m_cfg->blocks()) {
CodeVisualizer::block(b, b->id(), m_exc_blocks.count(b),
[&](Block*) { mie_list(b); });
}
}
}
}
private:
ControlFlowGraph* m_cfg;
std::unordered_map<const MethodItemEntry*, size_t> m_mie_id_map;
std::unordered_map<const IRInstruction*, size_t> m_insn_id_map;
std::unordered_set<Block*> m_exc_blocks;
};
class IRCodeVisualizer : public CodeVisualizer<IRCodeVisualizer> {
public:
IRCodeVisualizer(IRCode* code, std::ostream& output)
: CodeVisualizer(output), m_code(code) {
if (m_code) {
prepare();
}
}
virtual ~IRCodeVisualizer() {}
void empty_blocklist(const std::string& name) {
indent();
m_output << name;
m_output << std::endl;
}
void blocklist(const std::string& name, size_t succ_id) {
indent();
m_output << name;
m_output << " \"B" << succ_id << "\" ";
m_output << std::endl;
}
void predecessors(IRCode*) { empty_blocklist("predecessors"); }
void successors(IRCode*) { empty_blocklist("successors"); }
void exception_handlers(IRCode*) { empty_blocklist("xhandlers"); }
void predecessors(const std::string*) { empty_blocklist("predecessors"); }
void successors(const std::string*) { blocklist("successors", 0); }
void exception_handlers(const std::string*) { empty_blocklist("xhandlers"); }
void instruction(IRInstruction* insn, IRCode*) {
CodeVisualizer::instruction(insn);
}
size_t mie_id(const MethodItemEntry& mie) const {
return m_mie_id_map.at(&mie);
}
void prefix_block(const std::string& prefix) {
auto fake_insn = [&](const std::string*) {
mie_prefix(0, 0);
m_output << " info0 INFO";
attribute("data") << prefix;
mie_suffix();
};
CodeVisualizer::block(&prefix, (size_t)(-1), false, fake_insn);
}
void prepare() {
size_t index = 0;
for (auto& mie : *m_code) {
m_mie_id_map[&mie] = index++;
}
}
void code(const std::string& name, const optional<std::string>& prefix) {
{
TagRAII cfg_tag(this, "cfg");
value<QUOTED>("name") << name;
if (prefix) {
prefix_block(*prefix);
}
if (m_code) {
CodeVisualizer::block(m_code, 0, false,
[&](IRCode*) { mie_list(m_code); });
}
}
}
private:
IRCode* m_code;
std::unordered_map<const MethodItemEntry*, size_t> m_mie_id_map;
};
} // namespace
void print_compilation_header(std::ostream& os,
const std::string& name,
const std::string& method) {
TaggedBase printer(os);
TaggedBase::TagRAII compilation_tag(&printer, "compilation");
printer.value<TaggedBase::QUOTED>("name") << name;
printer.value<TaggedBase::QUOTED>("method") << method;
printer.value("date") << time(nullptr);
}
void print_cfg(std::ostream& os,
ControlFlowGraph* cfg,
const std::string& name,
const optional<std::string>& prefix_block) {
CFGVisualizer visualizer(cfg, os);
visualizer.cfg(name, prefix_block);
}
void print_ircode(std::ostream& os,
IRCode* code,
const std::string& name,
const optional<std::string>& prefix_block) {
if (code && code->cfg_built()) {
print_cfg(os, &code->cfg(), name, prefix_block);
return;
}
IRCodeVisualizer visualizer(code, os);
visualizer.code(name, prefix_block);
}
namespace {
// A fake name to allow dedupe.
static constexpr const char* FAKE_PASS_NAME = "FAKE_PASS_NAME_FOR_DEDUPE";
std::vector<DexMethod*> get_all_methods(DexClass* klass) {
std::vector<DexMethod*> all_methods;
all_methods.insert(all_methods.end(), klass->get_dmethods().begin(),
klass->get_dmethods().end());
all_methods.insert(all_methods.end(), klass->get_vmethods().begin(),
klass->get_vmethods().end());
return all_methods;
}
} // namespace
// A stream storage for CFG visualization. On request, will not emit a pass if
// the CFG did not change.
MethodCFGStream::MethodCFGStream(DexMethod* m) : m_method(m) {
m_orig_name = vshow(m, false);
print_compilation_header(m_ss, m_orig_name, m_orig_name);
}
void MethodCFGStream::add_pass(const std::string& pass_name,
Options o,
const optional<std::string>& extra_prefix) {
auto cur_name = vshow(m_method, false);
auto code = m_method->get_code();
if (!code) {
cur_name += " (NO CODE)";
} else if (!(o & PRINT_CODE)) {
code = nullptr;
}
if (extra_prefix) {
cur_name = *extra_prefix + cur_name;
}
std::stringstream tmp;
if ((o & FORCE_CFG) && code) {
bool built_cfg = false;
if (code && !code->cfg_built()) {
built_cfg = true;
code->build_cfg();
}
print_cfg(tmp, &code->cfg(), FAKE_PASS_NAME, cur_name);
if (built_cfg) {
code->clear_cfg();
}
} else {
print_ircode(tmp, code, FAKE_PASS_NAME, cur_name);
}
std::string new_pass = tmp.str();
if (new_pass != m_last || !(o & SKIP_NO_CHANGE)) {
m_last = new_pass;
// Replace pass name.
auto pos = new_pass.find(FAKE_PASS_NAME);
redex_assert(pos != std::string::npos);
new_pass.replace(pos, strlen(FAKE_PASS_NAME), pass_name);
m_ss << new_pass;
}
}
ClassCFGStream::ClassCFGStream(DexClass* klass) : m_class(klass) {
for (auto* method : get_all_methods(klass)) {
m_methods.push_back(MethodState{method, MethodCFGStream(method), false});
}
}
void ClassCFGStream::add_pass(const std::string& pass_name, Options o) {
auto all_methods = get_all_methods(m_class);
for (auto& m : m_methods) {
auto it = std::find(all_methods.begin(), all_methods.end(), m.method);
if (it == all_methods.end()) {
m.removed = true;
} else {
all_methods.erase(it);
}
}
for (auto* method : all_methods) {
m_methods.push_back(MethodState{method, MethodCFGStream(method), false});
}
for (auto& m : m_methods) {
m.stream.add_pass(pass_name,
(Options)(o | (!m.removed ? Options::PRINT_CODE : 0)),
m.removed ? optional<std::string>("REMOVED ")
: boost::none);
}
}
void ClassCFGStream::write(std::ostream& os) const {
for (auto& m : m_methods) {
os << m.stream.get_output();
}
}
void Classes::add_all(const std::string& class_names) {
if (class_names.empty()) {
return;
}
std::vector<std::string> classes;
boost::algorithm::split(classes, class_names,
[](char c) { return c == ';'; });
for (const auto& c : classes) {
if (c.empty()) {
continue;
}
auto complete = c + ';';
if (!add(complete)) {
std::cerr << "Did not find class " << complete;
m_not_found.push_back(complete);
}
}
}
bool Classes::add(const std::string& class_name, bool add_initial_pass) {
auto type = DexType::make_type(class_name.c_str());
auto klass = type_class(type);
if (!klass) {
return false;
}
add(klass, add_initial_pass);
return true;
}
void Classes::add(DexClass* klass, bool add_initial_pass) {
m_class_cfgs.emplace_back(klass);
if (add_initial_pass) {
m_class_cfgs.back().add_pass("Initial");
}
}
void Classes::add_pass(const std::string& pass_name, Options o) {
add_pass([&pass_name]() { return pass_name; }, o);
}
void Classes::add_pass(const std::function<std::string()>& pass_name_lazy,
Options o) {
auto end =
std::remove_if(m_not_found.begin(), m_not_found.end(),
[&](const std::string& class_name) {
return add(class_name, /*add_initial_pass=*/false);
});
if (end != m_not_found.end()) {
m_not_found.erase(end, m_not_found.end());
}
if (m_class_cfgs.empty()) {
return;
}
std::string pass_name = pass_name_lazy();
for (auto& class_cfg : m_class_cfgs) {
class_cfg.add_pass(pass_name, o);
}
if (m_write_after_each_pass) {
write();
}
}
void Classes::write() const {
if (m_class_cfgs.empty()) {
return;
}
std::ofstream os(m_file_name);
for (const auto& c : m_class_cfgs) {
c.write(os);
}
}
} // namespace visualizer