libredex/SourceBlockConsistencyCheck.cpp (188 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include "SourceBlockConsistencyCheck.h"
#include "ControlFlow.h"
#include "Debug.h"
#include "DexClass.h"
#include "IRCode.h"
#include "InteractiveDebugging.h"
#include "ScopedCFG.h"
#include "Show.h"
#include "SourceBlocks.h"
#include "Trace.h"
#include "Walkers.h"
#include <algorithm>
#include <iterator>
#include <map>
#include <set>
#include <tuple>
#include <unordered_map>
#include <utility>
#include <vector>
namespace source_blocks {
bool operator<(const SourceBlockInfo& l, const SourceBlockInfo& r) {
if (l.original_dex_method != r.original_dex_method) {
return compare_dexstrings(l.original_dex_method, r.original_dex_method);
}
return l.id < r.id;
}
bool operator==(const SourceBlockInfo& l, const SourceBlockInfo& r) {
return std::tie(l.original_dex_method, l.id) ==
std::tie(r.original_dex_method, r.id);
}
SourceBlockDomInfo::SourceBlockDomInfo(const cfg::ControlFlowGraph& cfg,
uint32_t num_src_blks)
: m_dom_tree(cfg, num_src_blks) {}
std::vector<SourceBlockInfo> SourceBlockDomInfo::get_removeable_src_blks() {
return std::vector<SourceBlockInfo>{m_dom_tree.leaves().begin(),
m_dom_tree.leaves().end()};
}
void SourceBlockDomInfo::remove_src_blk(const SourceBlockInfo& sb_info) {
m_dom_tree.remove_src_blk(sb_info);
}
void SourceBlockConsistencyCheck::initialize(const Scope& scope) {
always_assert(!this->is_initialized());
this->m_is_initialized = true;
// Preallocate slots in the map, so we can get
// at them when multithreaded without needing synchronization
walk::methods(scope, [this](DexMethod* dex_method) {
this->m_context_map.insert({dex_method, SBConsistencyContext{}});
});
walk::parallel::methods(scope, [this](DexMethod* dex_method) {
auto it = this->m_context_map.find(dex_method);
if (it == this->m_context_map.end()) {
return;
}
IRCode* code = dex_method->get_code();
if (code == nullptr) {
return;
}
cfg::ScopedCFG scfg(code);
cfg::ControlFlowGraph& cfg = code->cfg();
cfg.calculate_exit_block();
this->rebuild_sbdi(dex_method, cfg);
});
}
void SourceBlockConsistencyCheck::rebuild_sbdi(DexMethod* dex_method,
const ControlFlowGraph& cfg) {
auto it = this->m_context_map.find(dex_method);
if (it == this->m_context_map.end()) {
return;
}
auto& sbcc = it->second;
sbcc.m_source_blocks.clear();
for (auto* b : cfg.blocks()) {
source_blocks::foreach_source_block(b, [&sbcc](auto& sb) {
sbcc.m_source_blocks.insert(SourceBlockInfo{sb->src, sb->id});
});
}
sbcc.m_sbdi = SourceBlockDomInfo{
cfg, static_cast<uint32_t>(sbcc.m_source_blocks.size())};
}
template <class T>
struct merge_vecs {
void operator()(const T& addend, T* accumulator) const {
accumulator->insert(accumulator->end(), addend.begin(), addend.end());
}
};
size_t SourceBlockConsistencyCheck::run(const Scope& scope) {
struct Failure {
DexMethod* dex_method = nullptr;
std::vector<SourceBlockInfo> src_blks;
};
auto res = walk::parallel::methods<std::vector<Failure>,
merge_vecs<std::vector<Failure>>>(
scope, [this](DexMethod* dex_method) -> std::vector<Failure> {
auto it = this->m_context_map.find(dex_method);
if (it == this->m_context_map.end()) {
return {};
}
auto& sbcc = it->second;
auto& [source_blocks, known_missing_source_blocks, sbdi] = sbcc;
IRCode* code = dex_method->get_code();
if (code == nullptr) {
return {};
}
cfg::ScopedCFG scfg(code);
const cfg::ControlFlowGraph& cfg = code->cfg();
std::set<SourceBlockInfo> source_blocks_in_ir;
for (cfg::Block* block : cfg.blocks()) {
source_blocks::foreach_source_block(
block, [&source_blocks_in_ir](auto& sb) {
source_blocks_in_ir.insert({sb->src, sb->id});
});
}
std::vector<SourceBlockInfo> missing;
std::set_difference(source_blocks.begin(), source_blocks.end(),
source_blocks_in_ir.begin(),
source_blocks_in_ir.end(),
std::back_inserter(missing));
missing.erase(std::remove_if(
missing.begin(), missing.end(),
[&sbcc](const auto& s) {
return sbcc.m_known_missing_source_blocks.find(s) !=
sbcc.m_known_missing_source_blocks.end();
}),
missing.end());
if (missing.empty()) {
return {};
}
known_missing_source_blocks.insert(missing.begin(), missing.end());
std::sort(missing.begin(), missing.end());
for (bool changed = true; changed;) {
changed = false;
auto removeable_blks = sbdi.get_removeable_src_blks();
std::sort(removeable_blks.begin(), removeable_blks.end());
std::vector<SourceBlockInfo> blks_to_remove;
std::set_intersection(removeable_blks.begin(), removeable_blks.end(),
missing.begin(), missing.end(),
std::back_inserter(blks_to_remove));
if (!blks_to_remove.empty()) {
changed = true;
missing.erase(std::remove_if(missing.begin(), missing.end(),
[&blks_to_remove](auto& m) {
auto it_blks_to_remove =
std::lower_bound(
blks_to_remove.begin(),
blks_to_remove.end(), m);
return it_blks_to_remove !=
blks_to_remove.end() &&
*it_blks_to_remove == m;
}),
missing.end());
for (const auto& sb_info : blks_to_remove) {
sbdi.remove_src_blk(sb_info);
}
}
}
if (missing.empty()) {
return {};
}
return std::vector<Failure>{{dex_method, std::move(missing)}};
});
if (!res.empty()) {
int num_missing_blks = std::accumulate(
res.begin(), res.end(), 0,
[](const auto& l, const auto& r) { return l + r.src_blks.size(); });
TRACE(SBCC, 2,
"Pass introduced %d missing source blocks across %zu methods.",
num_missing_blks, res.size());
for (const auto& [dex_method, src_blks] : res) {
always_assert(!src_blks.empty());
std::map<const DexString*, std::vector<uint32_t>, dexstrings_comparator>
methodRefToSrcBlockIds;
for (const auto& [src, id] : src_blks) {
methodRefToSrcBlockIds[src].push_back(id);
}
TRACE(SBCC, 2, " Missing source blocks in method, %s",
show_deobfuscated(dex_method).c_str());
for (const auto& [src, ids] : methodRefToSrcBlockIds) {
std::string idListStr = std::accumulate(
ids.begin() + 1, ids.end(), std::to_string(ids.front()),
[](const auto& l, const auto& r) {
return l + ", " + std::to_string(r);
});
TRACE(SBCC, 2, " %s:\n %s", src->c_str(), idListStr.c_str());
}
}
return num_missing_blks;
}
return 0;
}
} // namespace source_blocks