libredex/DexAssessments.cpp (391 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #include "DexAssessments.h" #include <ostream> #include "DexClass.h" #include "DexInstruction.h" #include "DexPosition.h" #include "DexUtil.h" #include "IRCode.h" #include "IROpcode.h" #include "RedexContext.h" #include "Show.h" #include "Trace.h" #include "Walkers.h" // We define particular assessment implementations in their own namespaces. namespace { namespace dex_position { struct Assessment { // Positions uint64_t methods_without_positions{0}; uint64_t methods_with_unknown_source_positions{0}; uint64_t blocks_outside_try_without_positions{0}; uint64_t blocks_inside_try_without_positions{0}; uint64_t dangling_parent_positions{0}; uint64_t parent_position_cycles{0}; uint64_t outlined_method_invocation_without_pattern_position{0}; uint64_t pattern_position_without_outlined_method_invocation{0}; uint64_t switch_positions_outside_outlined_methods{0}; uint64_t pattern_positions_inside_outlined_methods{0}; uint64_t positions{0}; uint64_t switch_positions{0}; uint64_t pattern_positions{0}; uint32_t max_parent_depth{0}; bool has_problems() { return blocks_outside_try_without_positions || blocks_inside_try_without_positions || dangling_parent_positions || outlined_method_invocation_without_pattern_position || pattern_position_without_outlined_method_invocation || switch_positions_outside_outlined_methods || (!PositionPatternSwitchManager:: CAN_OUTLINED_METHOD_INVOKE_OUTLINED_METHOD && pattern_positions_inside_outlined_methods); } Assessment& operator+=(const Assessment& other) { methods_without_positions += other.methods_without_positions; methods_with_unknown_source_positions += other.methods_with_unknown_source_positions; blocks_outside_try_without_positions += other.blocks_outside_try_without_positions; blocks_inside_try_without_positions += other.blocks_inside_try_without_positions; dangling_parent_positions += other.dangling_parent_positions; parent_position_cycles += other.parent_position_cycles; outlined_method_invocation_without_pattern_position += other.outlined_method_invocation_without_pattern_position; pattern_position_without_outlined_method_invocation += other.pattern_position_without_outlined_method_invocation; switch_positions_outside_outlined_methods += other.switch_positions_outside_outlined_methods; pattern_positions_inside_outlined_methods += other.pattern_positions_inside_outlined_methods; positions += other.positions; switch_positions += other.switch_positions; pattern_positions += other.pattern_positions; max_parent_depth = std::max(max_parent_depth, other.max_parent_depth); return *this; } assessments::DexAssessment to_dex_assessment() { assessments::DexAssessment res; // Positions res["methods_without_positions"] = methods_without_positions; res["methods_with_unknown_source_positions"] = methods_with_unknown_source_positions; res["blocks_outside_try_without_positions"] = blocks_outside_try_without_positions; res["blocks_inside_try_without_positions"] = blocks_inside_try_without_positions; res["dangling_parent_positions"] = dangling_parent_positions; res["parent_position_cycles"] = parent_position_cycles; res["outlined_method_invocation_without_pattern_position"] = outlined_method_invocation_without_pattern_position; res["pattern_position_without_outlined_method_invocation"] = pattern_position_without_outlined_method_invocation; res["switch_positions_outside_outlined_methods"] = switch_positions_outside_outlined_methods; res["pattern_positions_inside_outlined_methods"] = pattern_positions_inside_outlined_methods; res["positions"] = positions; res["switch_positions"] = switch_positions; res["pattern_positions"] = pattern_positions; res["max_parent_depth"] = max_parent_depth; return res; } }; bool needs_position(IROpcode opcode) { if (!opcode::can_throw(opcode)) { return false; } if (opcode == OPCODE_CONST_STRING) { // javac and/or the dexer seem to systematically ignore const-string. return false; } if (opcode == OPCODE_NEW_ARRAY || opcode::is_an_aput(opcode)) { // javac and/or the dexer seem to systematically ignore certain // array-related instructions. return false; } if (opcode == OPCODE_MONITOR_ENTER || opcode == OPCODE_MONITOR_EXIT || opcode == OPCODE_CONST_CLASS) { // javac and/or the dexer seem not provide positions for implicit // synchronization code of synchronized methods. return false; } if (opcode == OPCODE_INSTANCE_OF) { // inserted by VirtualMerging, and cannot actually throw return false; } return true; } class Assessor { private: PositionPatternSwitchManager* m_manager; const DexString* m_unknown_source; public: Assessor() : m_manager(g_redex->get_position_pattern_switch_manager()), m_unknown_source(DexString::get_string("UnknownSource")) {} Assessment analyze_method(DexMethod* method, cfg::ControlFlowGraph& cfg) { Assessment assessment; auto is_outlined_method = method->rstate.outlined(); std::unordered_set<DexPosition*> positions; std::unordered_set<DexPosition*> parents; bool any_unknown_source_position = false; // We are working with a *non-editable* cfg here. A key difference between // an editable and an uneditable cfg is that the latter has not been // enriched with trailing positions in all blocks // (ControlFlowGraph::find_block_boundaries), while linearlization (via // remove_duplicate_positions) removes redundant positions across block // boundaries. Thus, we keep track of last positions across blocks, just as // the cfg would when building an editable cfg (and just as symbolication // would when going backwards to find the position relevant to an // instruction offset). DexPosition* last_position = nullptr; for (auto block : cfg.blocks()) { bool block_without_position_reported = false; for (auto it = block->begin(); it != block->end(); it++) { if (it->type == MFLOW_POSITION) { positions.insert(it->pos.get()); last_position = it->pos.get(); if (last_position->line == 0 && last_position->file == m_unknown_source) { any_unknown_source_position = true; } } else if (it->type == MFLOW_OPCODE) { auto insn = it->insn; if (!last_position && !block_without_position_reported && needs_position(insn->opcode())) { if (cfg.get_succ_edge_of_type(block, cfg::EdgeType::EDGE_THROW)) { assessment.blocks_inside_try_without_positions++; } else { assessment.blocks_outside_try_without_positions++; } block_without_position_reported = true; } if (opcode::is_invoke_static(insn->opcode()) && insn->get_method()->is_def() && insn->get_method()->as_def()->rstate.outlined()) { if (!last_position || !m_manager->is_pattern_position(last_position)) { assessment.outlined_method_invocation_without_pattern_position++; } } else if (last_position && m_manager->is_pattern_position(last_position) && opcode::may_throw(insn->opcode())) { assessment.pattern_position_without_outlined_method_invocation++; } } } } std::unordered_map<DexPosition*, uint32_t> parent_depths; std::function<uint32_t(DexPosition*)> get_parent_depth; get_parent_depth = [&](DexPosition* pos) -> uint32_t { if (pos == nullptr) { return 0; } auto it = parent_depths.find(pos); if (it != parent_depths.end()) { if (it->second < 0) { assessment.parent_position_cycles++; return 0; } return it->second; } if (!positions.count(pos)) { assessment.dangling_parent_positions++; return 0; } parent_depths.emplace(pos, -1); auto depth = get_parent_depth(pos->parent) + 1; parent_depths[pos] = depth; assessment.max_parent_depth = std::max(assessment.max_parent_depth, depth); return depth; }; for (auto pos : positions) { get_parent_depth(pos->parent); if (m_manager->is_pattern_position(pos)) { assessment.pattern_positions++; if (is_outlined_method) { assessment.pattern_positions_inside_outlined_methods++; } } else if (m_manager->is_switch_position(pos)) { assessment.switch_positions++; if (!is_outlined_method) { assessment.switch_positions_outside_outlined_methods++; } } } if (positions.empty()) { assessment.methods_without_positions++; // we forgive the missing block positions assessment.blocks_inside_try_without_positions = 0; assessment.blocks_outside_try_without_positions = 0; } else if (any_unknown_source_position) { assessment.methods_with_unknown_source_positions++; // we forgive the missing block positions assessment.blocks_inside_try_without_positions = 0; assessment.blocks_outside_try_without_positions = 0; } assessment.positions += positions.size(); return assessment; } }; } // namespace dex_position } // namespace namespace assessments { std::vector<DexAssessmentItem> order(const DexAssessment& assessment) { std::vector<DexAssessmentItem> res(assessment.begin(), assessment.end()); std::sort(res.begin(), res.end(), [](const DexAssessmentItem& a, const DexAssessmentItem& b) { return a.first < b.first; }); return res; } std::string to_string(const DexAssessment& assessment) { std::ostringstream oss; bool first = true; for (auto& p : order(assessment)) { if (p.second) { if (first) { first = false; } else { oss << ", "; } oss << p.first << ": " << p.second; } } return oss.str(); } DexAssessment DexScopeAssessor::run() { // This struct combines all individual assessment implementations. struct Assessment { dex_position::Assessment dex_position_assessment; Assessment& operator+=(const Assessment& other) { dex_position_assessment += other.dex_position_assessment; return *this; } bool has_problems() { return dex_position_assessment.has_problems(); } DexAssessment to_dex_assessment() { return dex_position_assessment.to_dex_assessment(); } }; struct ClassStats { std::atomic<size_t> classes_without_deobfuscated_name{0}; std::atomic<size_t> with_annotations{0}; std::atomic<size_t> sum_annotations{0}; }; ClassStats class_stats{}; walk::parallel::classes(m_scope, [&class_stats](DexClass* c) { if (c->get_deobfuscated_name_or_null() == nullptr) { class_stats.classes_without_deobfuscated_name.fetch_add(1); } auto* aset = c->get_anno_set(); if (aset != nullptr && aset->size() > 0) { class_stats.with_annotations.fetch_add(1, std::memory_order_relaxed); class_stats.sum_annotations.fetch_add(aset->size(), std::memory_order_relaxed); } }); struct FieldStats { std::atomic<size_t> fields_without_deobfuscated_name{0}; std::atomic<size_t> num_fields{0}; std::atomic<size_t> with_annotations{0}; std::atomic<size_t> sum_annotations{0}; }; FieldStats field_stats{}; walk::parallel::fields(m_scope, [&field_stats](DexField* f) { field_stats.num_fields.fetch_add(1, std::memory_order_relaxed); auto* aset = f->get_anno_set(); if (aset != nullptr && aset->size() > 0) { field_stats.with_annotations.fetch_add(1, std::memory_order_relaxed); field_stats.sum_annotations.fetch_add(aset->size(), std::memory_order_relaxed); } if (f->get_deobfuscated_name().empty()) { field_stats.fields_without_deobfuscated_name.fetch_add(1); } }); struct MethodStats { std::atomic<size_t> methods_without_deobfuscated_name{0}; std::atomic<size_t> num_methods{0}; std::atomic<size_t> methods_with_code{0}; std::atomic<size_t> num_instructions{0}; std::atomic<size_t> sum_opcodes{0}; std::atomic<size_t> with_annotations{0}; std::atomic<size_t> sum_annotations{0}; std::atomic<size_t> with_param_annotations{0}; std::atomic<size_t> sum_param_annotations{0}; }; MethodStats method_stats{}; walk::parallel::methods(m_scope, [&method_stats](auto* m) { method_stats.num_methods.fetch_add(1, std::memory_order_relaxed); { auto* aset = m->get_anno_set(); if (aset != nullptr && aset->size() > 0) { method_stats.with_annotations.fetch_add(1, std::memory_order_relaxed); method_stats.sum_annotations.fetch_add(aset->size(), std::memory_order_relaxed); } } { auto* panno = m->get_param_anno(); if (panno != nullptr && !panno->empty()) { method_stats.with_param_annotations.fetch_add( 1, std::memory_order_relaxed); method_stats.sum_param_annotations.fetch_add(panno->size(), std::memory_order_relaxed); } } if (m->get_deobfuscated_name_or_null() == nullptr) { method_stats.methods_without_deobfuscated_name.fetch_add(1); } auto code = m->get_code(); if (code == nullptr) { return; } method_stats.methods_with_code.fetch_add(1, std::memory_order_relaxed); method_stats.num_instructions.fetch_add(code->count_opcodes(), std::memory_order_relaxed); method_stats.sum_opcodes.fetch_add(code->sum_opcode_sizes(), std::memory_order_relaxed); }); dex_position::Assessor dex_position_assessor; auto combined_assessment = walk::parallel::methods<Assessment>( m_scope, [&dex_position_assessor](DexMethod* method) { Assessment assessment; auto code = method->get_code(); if (!code) { return assessment; } always_assert(!code->editable_cfg_built()); if (!code->cfg_built()) { code->build_cfg(/*editable*/ false); } assessment.dex_position_assessment = dex_position_assessor.analyze_method(method, code->cfg()); if (traceEnabled(ASSESSOR, 2) && assessment.has_problems()) { if (traceEnabled(ASSESSOR, 3)) { TRACE(ASSESSOR, 3, "[scope assessor] %s: %s\n%s", SHOW(method), to_string(assessment.to_dex_assessment()).c_str(), SHOW(code->cfg())); } else { TRACE(ASSESSOR, 2, "[scope assessor] %s: %s", SHOW(method), to_string(assessment.to_dex_assessment()).c_str()); } } return assessment; }); auto res = combined_assessment.to_dex_assessment(); res["without_deobfuscated_names.methods"] = method_stats.methods_without_deobfuscated_name.load(); res["without_deobfuscated_names.fields"] = field_stats.fields_without_deobfuscated_name.load(); res["without_deobfuscated_names.classes"] = class_stats.classes_without_deobfuscated_name.load(); res["num_classes"] = m_scope.size(); res["num_methods"] = method_stats.num_methods.load(); res["num_fields"] = field_stats.num_fields.load(); res["methods~with~code"] = method_stats.methods_with_code.load(); res["num_instructions"] = method_stats.num_instructions.load(); res["sum_opcodes"] = method_stats.sum_opcodes.load(); res["methods.with_annotations"] = method_stats.with_annotations.load(); res["methods.sum_annotations"] = method_stats.sum_annotations.load(); res["methods.with_param_annotations"] = method_stats.with_param_annotations.load(); res["methods.sum_param_annotations"] = method_stats.sum_param_annotations.load(); res["fields.with_annotations"] = field_stats.with_annotations.load(); res["fields.sum_annotations"] = field_stats.sum_annotations.load(); res["classes.with_annotations"] = class_stats.with_annotations.load(); res["classes.sum_annotations"] = class_stats.sum_annotations.load(); if (combined_assessment.has_problems()) { TRACE(ASSESSOR, 1, "[scope assessor] %s", to_string(res).c_str()); } return res; } } // namespace assessments