source/Highlights.cpp (513 lines of code) (raw):

/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #include <fstream> #include <DexUtil.h> #include <SpartaWorkQueue.h> #include <mariana-trench/Highlights.h> #include <mariana-trench/Log.h> #include <mariana-trench/Methods.h> #include <mariana-trench/Options.h> #include <mariana-trench/Positions.h> #include <mariana-trench/Registry.h> namespace marianatrench { namespace { using Bounds = Highlights::Bounds; using FileLines = Highlights::FileLines; enum class FrameType { Source, Sink }; /* * Given a line and column that is assumed not to be a whitespace's position, * returns a Bounds object describing the location of the next non-whitespace * character, if one exists before EOF. */ std::optional<Bounds> get_next_non_whitespace_position( const FileLines& lines, std::size_t current_line_number, std::size_t current_column) { current_column++; while (lines.has_line_number(current_line_number)) { const auto& line = lines.line(current_line_number); while (current_column < line.length()) { if (!std::isspace(line[current_column])) { return Bounds( {static_cast<int>(current_line_number), static_cast<int>(current_column), static_cast<int>(current_column)}); } current_column++; } current_column = 0; current_line_number++; } return std::nullopt; } Bounds remove_surrounding_whitespace(Bounds bounds, const std::string& line) { auto new_start = bounds.start; auto new_end = bounds.end; while (new_start < bounds.end && std::isspace(line[new_start])) { new_start++; } while (new_end > new_start && std::isspace(line[new_end])) { new_end--; } return {bounds.line, new_start, new_end}; } Bounds get_argument_bounds( ParameterPosition callee_parameter_position, ParameterPosition first_parameter_position, const FileLines& lines, const Bounds& callee_name_bounds) { auto current_parameter_position = first_parameter_position; std::size_t line_number = callee_name_bounds.line; auto current_line = lines.line(line_number); std::size_t arguments_start = callee_name_bounds.end + 2; if (arguments_start > current_line.length() - 1) { if (!lines.has_line_number(line_number + 1)) { return callee_name_bounds; } line_number++; current_line = lines.line(line_number); arguments_start = 0; } std::size_t end = current_line.length() - 1; int balanced_parentheses_counter = 1; while (lines.has_line_number(line_number)) { current_line = lines.line(line_number); end = current_line.length() - 1; for (auto i = arguments_start; i < current_line.length(); i++) { if (current_line[i] == '(') { balanced_parentheses_counter++; } else if (current_line[i] == ')') { balanced_parentheses_counter--; } else if (current_line[i] == ',' && balanced_parentheses_counter == 1) { if (current_parameter_position == callee_parameter_position) { end = i - 1; break; } current_parameter_position++; if (current_parameter_position == callee_parameter_position) { arguments_start = i + 1; } } if (balanced_parentheses_counter == 0) { end = i - 1; break; } } mt_assert(current_parameter_position <= callee_parameter_position); auto highlighted_portion = current_line.substr(arguments_start, end - arguments_start + 1); if (balanced_parentheses_counter == 0 || (callee_parameter_position == current_parameter_position && std::any_of( std::begin(highlighted_portion), std::end(highlighted_portion), [](unsigned char c) { return std::isalpha(c); }))) { break; } line_number++; arguments_start = 0; } // In either of these cases, we have failed to find the argument if (current_parameter_position < callee_parameter_position || !lines.has_line_number(line_number)) { return callee_name_bounds; } Bounds highlight_bounds = { static_cast<int>(line_number), static_cast<int>(arguments_start), static_cast<int>(end)}; return remove_surrounding_whitespace(highlight_bounds, current_line); } Bounds get_callee_this_parameter_bounds( const std::string& line, const Bounds& callee_name_bounds) { auto callee_start = callee_name_bounds.start; if (callee_start - 1 < 0 || line[callee_start - 1] != '.') { return callee_name_bounds; } auto start = callee_start - 1; while (start >= 1 && !std::isspace(line[start - 1])) { start--; } if (start != callee_start - 1) { return {callee_name_bounds.line, start, callee_start - 2}; } // If the current line doesn't contain the argument, just highlight the // callee so that we don't highlight a previous chained method call, etc return callee_name_bounds; } std::optional<std::string> get_class_name(const DexMethod* callee) { const auto& class_name = callee->get_class()->get_name()->str(); auto length = class_name.length(); if (length <= 2 || class_name[length - 1] != ';' || class_name[length - 2] == '/') { return std::nullopt; } auto i = class_name.rfind('/', /* start */ length - 2); if (i == std::string::npos) { return std::nullopt; } return class_name.substr(i + 1, length - i - 2); } Bounds get_iput_local_position_bounds( const FileLines& lines, const std::string& field_name, int line_number) { auto line = lines.line(line_number); auto field_start = line.find(field_name); if (field_start == std::string::npos) { return {line_number, 0, 0}; } Bounds field_name_bounds = { line_number, static_cast<int>(field_start), static_cast<int>(field_start + field_name.length() - 1)}; // The next non-whitespace character read must be an = sign auto next_character = get_next_non_whitespace_position( lines, line_number, field_name_bounds.end); if (!next_character || lines.line(next_character->line)[next_character->start] != '=') { return field_name_bounds; } auto assignee = get_next_non_whitespace_position( lines, next_character->line, next_character->start); if (!assignee) { return field_name_bounds; } line = lines.line(assignee->line); Bounds highlight_bounds = { static_cast<int>(assignee->line), static_cast<int>(assignee->start), static_cast<int>(line.length() - 1)}; return remove_surrounding_whitespace(highlight_bounds, line); } LocalPositionSet augment_local_positions( const LocalPositionSet& local_positions, const FileLines& lines, const Context& context) { if (local_positions.is_bottom() || local_positions.is_top()) { return local_positions; } auto new_local_positions = LocalPositionSet(); for (const auto* local_position : local_positions.elements()) { if (!local_position->path() || !local_position->instruction() || local_position->port() == std::nullopt) { new_local_positions.add(local_position); continue; } auto bounds = Highlights::get_local_position_bounds(*local_position, lines); new_local_positions.add(context.positions->get( local_position, bounds.line, bounds.start, bounds.end)); } return Highlights::filter_overlapping_highlights(new_local_positions); } const Position* augment_frame_position( const Method* callee, const AccessPath& callee_port, const Position* position, const FileLines& lines, const Context& context) { mt_assert(position != nullptr); mt_assert(callee != nullptr); auto bounds = Highlights::get_callee_highlight_bounds( callee->dex_method(), lines, position->line(), callee_port); return context.positions->get( position, bounds.line, bounds.start, bounds.end); } Taint augment_taint_positions( Taint taint, const FileLines& lines, const Context& context) { taint.update_non_leaf_positions( [&](const Method* callee, const AccessPath& callee_port, const Position* position) { return augment_frame_position( callee, callee_port, position, lines, context); }, [&](const LocalPositionSet& local_positions) { return augment_local_positions(local_positions, lines, context); }); return taint; } TaintAccessPathTree augment_taint_tree_positions( TaintAccessPathTree taint_tree, const FileLines& lines, const Context& context) { taint_tree.map([&](Taint& taint) { auto new_taint = augment_taint_positions(taint, lines, context); taint = std::move(new_taint); }); return taint_tree; } IssueSet augment_issue_positions( IssueSet issues, const FileLines& lines, const Context& context) { issues.map([&](Issue& issue) { Issue augmented_issue = Issue( augment_taint_positions(issue.sources(), lines, context), augment_taint_positions(issue.sinks(), lines, context), issue.rule(), issue.position()); issue = std::move(augmented_issue); }); return issues; } /** * Run a fixpoint to go through all the sources(or sinks) involved in issues * to add all the relevant files and methods to the mapping * `issue_files_to_methods`. */ void get_frames_files_to_methods( ConcurrentMap<const std::string*, std::unordered_set<const Method*>>& issue_files_to_methods, const ConcurrentSet<const Frame*>& frames, const Context& context, const Registry& registry, FrameType frame_type) { auto frames_to_check = std::make_unique<ConcurrentSet<const Frame*>>(frames); auto seen_frames = std::make_unique<ConcurrentSet<const Frame*>>(frames); while (frames_to_check->size() != 0) { auto new_frames_to_check = std::make_unique<ConcurrentSet<const Frame*>>(); auto queue = sparta::work_queue<const Frame*>([&](const Frame* frame) { const auto* callee = frame->callee(); if (!callee) { return; } auto callee_model = registry.get(callee); const auto& callee_port = frame->callee_port(); const auto* method_position = context.positions->get(callee); if (method_position && method_position->path()) { issue_files_to_methods.update( method_position->path(), [&](const std::string* /*filepath*/, std::unordered_set<const Method*>& methods, bool) { methods.emplace(callee); }); } Taint taint; if (frame_type == FrameType::Source) { taint = callee_model.generations().raw_read(callee_port).root(); } else if (frame_type == FrameType::Sink) { taint = callee_model.sinks().raw_read(callee_port).root(); } for (const auto& frame_set : taint) { for (const auto& callee_frame : frame_set) { if (callee_frame.is_leaf() || !seen_frames->emplace(&callee_frame)) { continue; } new_frames_to_check->emplace(&callee_frame); } } }); for (const auto& frame : *frames_to_check) { queue.add_item(frame); } queue.run_all(); frames_to_check = std::move(new_frames_to_check); } } /** * Returns a map of files involved in issues to the set of all the methods * defined in that file that are involved in issues. This way, when finding * highlights, we can open each file only once and only consider the * relevant methods in that file. */ ConcurrentMap<const std::string*, std::unordered_set<const Method*>> get_issue_files_to_methods(const Context& context, const Registry& registry) { ConcurrentMap<const std::string*, std::unordered_set<const Method*>> issue_files_to_methods; ConcurrentSet<const Frame*> sources; ConcurrentSet<const Frame*> sinks; auto queue = sparta::work_queue<const Method*>([&](const Method* method) { auto model = registry.get(method); if (model.issues().size() == 0) { return; } for (const auto& issue : model.issues()) { for (const auto& sink_frame_set : issue.sinks()) { for (const auto& sink : sink_frame_set) { if (!sink.is_leaf()) { sinks.emplace(&sink); } } } for (const auto& source_frame_set : issue.sources()) { for (const auto& source : source_frame_set) { if (!source.is_leaf()) { sources.emplace(&source); } } } } auto* method_position = context.positions->get(method); if (!method_position || !method_position->path()) { return; } issue_files_to_methods.update( method_position->path(), [&](const std::string* /*filepath*/, std::unordered_set<const Method*>& methods, bool) { methods.emplace(method); }); }); for (const auto* method : *context.methods) { queue.add_item(method); } queue.run_all(); get_frames_files_to_methods( issue_files_to_methods, sources, context, registry, FrameType::Source); get_frames_files_to_methods( issue_files_to_methods, sinks, context, registry, FrameType::Sink); return issue_files_to_methods; } } // namespace FileLines::FileLines(std::ifstream& stream) { std::string line; while (std::getline(stream, line)) { lines_.push_back(line); } } bool FileLines::has_line_number(std::size_t index) const { return index >= 1 && index <= lines_.size(); } const std::string& FileLines::line(std::size_t index) const { mt_assert(has_line_number(index)); return lines_[index - 1]; } std::size_t FileLines::size() const { return lines_.size(); } Bounds Highlights::get_local_position_bounds( const Position& local_position, const FileLines& lines) { auto line_number = local_position.line(); Bounds empty_bounds = {line_number, 0, 0}; if (!lines.has_line_number(line_number)) { WARNING( 3, "Trying to access line {} of a file with {} lines", line_number, lines.size()); return empty_bounds; } mt_assert(local_position.instruction() != nullptr); if (opcode::is_an_iput(local_position.instruction()->opcode())) { const auto& field_name = local_position.instruction()->get_field()->get_name()->str(); return get_iput_local_position_bounds(lines, field_name, line_number); } if (opcode::is_an_invoke(local_position.instruction()->opcode())) { const auto* callee_dex_method = local_position.instruction()->get_method(); if (!callee_dex_method->as_def()) { return empty_bounds; } mt_assert(local_position.port() != std::nullopt); return get_callee_highlight_bounds( callee_dex_method->as_def(), lines, local_position.line(), AccessPath(*local_position.port())); } return empty_bounds; } Bounds Highlights::get_callee_highlight_bounds( const DexMethod* callee, const FileLines& lines, int callee_line_number, const AccessPath& callee_port) { if (!lines.has_line_number(callee_line_number)) { WARNING( 3, "Trying to access line {} of a file with {} lines", callee_line_number, lines.size()); return {callee_line_number, 0, 0}; } auto line = lines.line(callee_line_number); auto callee_name = callee->get_name()->str(); if (method::is_init(callee)) { auto class_name = get_class_name(callee); if (!class_name) { return {callee_line_number, 0, 0}; } callee_name = "new " + *class_name; } auto callee_start = line.find(callee_name + "("); if (callee_start == std::string::npos) { return {callee_line_number, 0, 0}; } std::size_t callee_end = std::min(callee_start + callee_name.length() - 1, line.length() - 1); Bounds callee_name_bounds = { callee_line_number, static_cast<int>(callee_start), static_cast<int>(callee_end)}; if (!callee_port.root().is_argument() || (method::is_init(callee) && callee_port.root().parameter_position() == 0)) { return callee_name_bounds; } bool is_static = ::is_static(callee); if (callee_port.root().parameter_position() == 0 && !is_static) { return get_callee_this_parameter_bounds(line, callee_name_bounds); } return get_argument_bounds( callee_port.root().parameter_position(), is_static ? 0 : 1, lines, callee_name_bounds); } void Highlights::augment_positions(Registry& registry, const Context& context) { auto current_path = boost::filesystem::current_path(); boost::filesystem::current_path(context.options->source_root_directory()); auto issue_files_to_methods = get_issue_files_to_methods(context, registry); auto file_queue = sparta::work_queue<const std::string*>([&](const std::string* filepath) { std::ifstream stream(*filepath); if (stream.bad()) { WARNING(1, "File {} was not found.", *filepath); return; } auto lines = FileLines(stream); for (const auto* method : issue_files_to_methods.get(filepath, {})) { const auto old_model = registry.get(method); auto new_model = old_model; new_model.set_issues( augment_issue_positions(old_model.issues(), lines, context)); new_model.set_sinks( augment_taint_tree_positions(old_model.sinks(), lines, context)); new_model.set_generations(augment_taint_tree_positions( old_model.generations(), lines, context)); new_model.set_parameter_sources(augment_taint_tree_positions( old_model.parameter_sources(), lines, context)); registry.set(new_model); } }); for (const auto& [filepath, _] : issue_files_to_methods) { file_queue.add_item(filepath); } file_queue.run_all(); boost::filesystem::current_path(current_path); } LocalPositionSet Highlights::filter_overlapping_highlights( const LocalPositionSet& local_positions) { mt_assert(local_positions.is_value()); std::unordered_map<int, std::vector<const Position*>> grouped_by_line; for (const auto* local_position : local_positions.elements()) { auto line = local_position->line(); auto& same_line_positions = grouped_by_line[line]; if (same_line_positions.empty()) { same_line_positions.push_back(local_position); continue; } // No need to replace any existing highlights if the current one is empty if (local_position->end() <= 0) { continue; } auto current_start = local_position->start(); auto current_end = local_position->end(); std::vector<const Position*> new_positions; bool seen_shorter_overlapping_with_current = false; for (const auto* position : same_line_positions) { if (position->end() <= 0) { continue; } if (!position->overlaps(*local_position)) { new_positions.push_back(position); } else if ( current_end - current_start > position->end() - position->start()) { new_positions.push_back(position); seen_shorter_overlapping_with_current = true; } } if (!seen_shorter_overlapping_with_current) { new_positions.push_back(local_position); } same_line_positions = std::move(new_positions); } auto new_local_positions = LocalPositionSet(); for (const auto& [_, positions] : grouped_by_line) { for (const auto* position : positions) { new_local_positions.add(position); } } return new_local_positions; } } // namespace marianatrench