source/Transfer.cpp (1,214 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <limits>
#include <fmt/format.h>
#include <mariana-trench/ArtificialMethods.h>
#include <mariana-trench/CallGraph.h>
#include <mariana-trench/ClassProperties.h>
#include <mariana-trench/Features.h>
#include <mariana-trench/Fields.h>
#include <mariana-trench/FulfilledPartialKindState.h>
#include <mariana-trench/Log.h>
#include <mariana-trench/Methods.h>
#include <mariana-trench/MultiSourceMultiSinkRule.h>
#include <mariana-trench/PartialKind.h>
#include <mariana-trench/Positions.h>
#include <mariana-trench/Rules.h>
#include <mariana-trench/Transfer.h>
#include <mariana-trench/TriggeredPartialKind.h>
namespace marianatrench {
namespace {
constexpr Register k_result_register = std::numeric_limits<Register>::max();
inline void log_instruction(
const MethodContext* context,
const IRInstruction* instruction) {
LOG_OR_DUMP(context, 4, "Instruction: \033[33m{}\033[0m", show(instruction));
}
} // namespace
bool Transfer::analyze_default(
MethodContext* context,
const IRInstruction* instruction,
AnalysisEnvironment* environment) {
log_instruction(context, instruction);
// Assign the result register to a new memory location.
auto* memory_location = context->memory_factory.make_location(instruction);
if (instruction->has_dest()) {
LOG_OR_DUMP(
context,
4,
"Setting register {} to {}",
instruction->dest(),
show(memory_location));
environment->assign(instruction->dest(), memory_location);
} else if (instruction->has_move_result_any()) {
LOG_OR_DUMP(
context, 4, "Setting result register to {}", show(memory_location));
environment->assign(k_result_register, memory_location);
} else {
return false;
}
LOG_OR_DUMP(context, 4, "Tainting {} with {{}}", show(memory_location));
environment->write(memory_location, TaintTree::bottom(), UpdateKind::Strong);
return false;
}
bool Transfer::analyze_check_cast(
MethodContext* context,
const IRInstruction* instruction,
AnalysisEnvironment* environment) {
log_instruction(context, instruction);
mt_assert(instruction->srcs().size() == 1);
// Add via-cast feature
auto taint = environment->read(instruction->srcs()[0]);
auto features = FeatureMayAlwaysSet::make_always(
{context->features.get_via_cast_feature(instruction->get_type())});
taint.map(
[&features](Taint& sources) { sources.add_inferred_features(features); });
// Create a new memory location as we do not want to alias the pre-cast
// location when attaching the via-cast feature.
auto memory_location = context->memory_factory.make_location(instruction);
environment->write(memory_location, taint, UpdateKind::Strong);
LOG_OR_DUMP(
context,
4,
"Setting result register to new memory location {}",
show(memory_location));
environment->assign(k_result_register, memory_location);
return false;
}
bool Transfer::analyze_iget(
MethodContext* context,
const IRInstruction* instruction,
AnalysisEnvironment* environment) {
log_instruction(context, instruction);
mt_assert(instruction->srcs().size() == 1);
mt_assert(instruction->has_field());
const auto* field =
context->call_graph.resolved_field_access(context->method(), instruction);
if (!field) {
WARNING_OR_DUMP(
context,
3,
"Unable to resolve access of instance field {}",
show(instruction->get_field()));
}
auto field_model = field ? context->registry.get(field) : FieldModel();
// Create a memory location that represents the field.
auto memory_locations = environment->memory_locations(
/* register */ instruction->srcs()[0],
/* field */ instruction->get_field()->get_name());
LOG_OR_DUMP(context, 4, "Setting result register to {}", memory_locations);
environment->assign(k_result_register, memory_locations);
if (!field_model.empty()) {
LOG_OR_DUMP(
context,
4,
"Tainting register {} with {}",
k_result_register,
field_model.sources());
environment->write(
k_result_register, Path({}), field_model.sources(), UpdateKind::Strong);
}
return false;
}
bool Transfer::analyze_sget(
MethodContext* context,
const IRInstruction* instruction,
AnalysisEnvironment* environment) {
log_instruction(context, instruction);
mt_assert(instruction->srcs().size() == 0);
mt_assert(instruction->has_field());
const auto* field =
context->call_graph.resolved_field_access(context->method(), instruction);
if (!field) {
WARNING_OR_DUMP(
context,
3,
"Unable to resolve access of static field {}",
show(instruction->get_field()));
}
auto field_model = field ? context->registry.get(field) : FieldModel();
auto memory_location = context->memory_factory.make_location(instruction);
LOG_OR_DUMP(context, 4, "Setting result register to {}", *memory_location);
environment->assign(k_result_register, memory_location);
if (!field_model.empty()) {
LOG_OR_DUMP(
context,
4,
"Tainting register {} with {}",
k_result_register,
field_model.sources());
environment->write(
k_result_register,
TaintTree(field_model.sources()),
UpdateKind::Strong);
}
return false;
}
namespace {
struct Callee {
const DexMethodRef* method_reference;
const Method* MT_NULLABLE resolved_base_method;
const Position* position;
Model model;
};
const std::vector<const DexType * MT_NULLABLE> get_source_register_types(
const MethodContext* context,
const IRInstruction* instruction) {
std::vector<const DexType* MT_NULLABLE> register_types = {};
for (const auto& source_register : instruction->srcs_vec()) {
register_types.push_back(context->types.register_type(
context->method(), instruction, source_register));
}
return register_types;
}
const std::vector<std::optional<std::string>> get_source_constant_arguments(
AnalysisEnvironment* environment,
const IRInstruction* instruction) {
std::vector<std::optional<std::string>> constant_arguments = {};
for (const auto& register_id : instruction->srcs_vec()) {
auto memory_locations = environment->memory_locations(register_id);
for (auto* memory_location : memory_locations.elements()) {
std::optional<std::string> value;
if (const auto* instruction_memory_location =
memory_location->dyn_cast<InstructionMemoryLocation>();
instruction_memory_location != nullptr) {
value = instruction_memory_location->get_constant();
}
constant_arguments.push_back(value);
}
}
return constant_arguments;
}
Callee get_callee(
MethodContext* context,
AnalysisEnvironment* environment,
const IRInstruction* instruction) {
mt_assert(opcode::is_an_invoke(instruction->opcode()));
auto call_target = context->call_graph.callee(context->method(), instruction);
if (!call_target.resolved()) {
WARNING_OR_DUMP(
context,
3,
"Unable to resolve call to `{}`",
show(instruction->get_method()));
} else {
LOG_OR_DUMP(
context,
4,
"Call resolved to `{}`",
show(call_target.resolved_base_callee()));
}
auto* position =
context->positions.get(context->method(), environment->last_position());
auto model = context->model_at_callsite(
call_target,
position,
get_source_register_types(context, instruction),
get_source_constant_arguments(environment, instruction));
LOG_OR_DUMP(context, 4, "Callee model: {}", model);
// Avoid copies using `std::move`.
// https://fb.workplace.com/groups/2292641227666517/permalink/2478196942444277/
return Callee{
instruction->get_method(),
call_target.resolved_base_callee(),
position,
std::move(model)};
}
Callee get_callee(
MethodContext* context,
AnalysisEnvironment* environment,
const ArtificialCallee& callee) {
const auto* resolved_base_callee = callee.call_target.resolved_base_callee();
mt_assert(resolved_base_callee != nullptr);
LOG_OR_DUMP(
context, 4, "Artificial call to `{}`", show(resolved_base_callee));
auto* position =
context->positions.get(context->method(), environment->last_position());
auto model = context->model_at_callsite(
callee.call_target,
position,
/* source_register_types */ {},
/* source_constant_arguments */ {});
LOG_OR_DUMP(context, 4, "Callee model: {}", model);
return Callee{
resolved_base_callee->dex_method(),
resolved_base_callee,
position,
std::move(model)};
}
void apply_generations(
MethodContext* context,
AnalysisEnvironment* environment,
const IRInstruction* instruction,
const Callee& callee,
TaintTree& result_taint) {
const auto& instruction_sources = instruction->srcs_vec();
LOG_OR_DUMP(
context,
4,
"Processing generations for call to `{}`",
show(callee.method_reference));
for (const auto& [root, generations] : callee.model.generations()) {
switch (root.kind()) {
case Root::Kind::Return: {
LOG_OR_DUMP(context, 4, "Tainting invoke result with {}", generations);
result_taint.join_with(generations);
break;
}
case Root::Kind::Argument: {
auto parameter_position = root.parameter_position();
auto register_id = instruction_sources.at(parameter_position);
LOG_OR_DUMP(
context,
4,
"Tainting register {} with {}",
register_id,
generations);
environment->write(register_id, generations, UpdateKind::Weak);
break;
}
default:
mt_unreachable();
}
}
}
void apply_propagations(
MethodContext* context,
const AnalysisEnvironment* previous_environment,
AnalysisEnvironment* new_environment,
const IRInstruction* instruction,
const Callee& callee,
TaintTree& result_taint) {
const auto& instruction_sources = instruction->srcs_vec();
LOG_OR_DUMP(
context,
4,
"Processing propagations for call to `{}`",
show(callee.method_reference));
for (const auto& [output, propagations] :
callee.model.propagations().elements()) {
auto output_features = FeatureMayAlwaysSet::make_always(
callee.model.add_features_to_arguments(output.root()));
for (const auto& propagation : propagations) {
LOG_OR_DUMP(
context, 4, "Processing propagation {} to {}", propagation, output);
const auto& input = propagation.input().root();
if (!input.is_argument()) {
WARNING_OR_DUMP(
context, 2, "Ignoring propagation with a return input: {}", input);
continue;
}
auto input_parameter_position = input.parameter_position();
if (input_parameter_position >= instruction_sources.size()) {
WARNING(
2,
"Model for method `{}` contains a port on parameter {} but the method only has {} parameters. Skipping...",
input_parameter_position,
show(callee.method_reference),
instruction_sources.size());
continue;
}
auto input_register_id = instruction_sources.at(input_parameter_position);
auto taint_tree = previous_environment->read(
input_register_id, propagation.input().path());
// Collapsing the tree here is required for correctness and performance.
// Propagations can be collapsed, which results in taking the common
// prefix of the input paths. Because of this, if we don't collapse here,
// we might build invalid trees. See the end-to-end test
// `propagation_collapse` for an example.
// However, collapsing leads to FP with the builder pattern.
// eg:
// class A {
// private String s1;
//
// public A setS1(String s) {
// this.s1 = s;
// return this;
// }
// }
// In this case, collapsing propagations results in entire `this` being
// tainted. For chained calls, it can lead to FP.
// `no-collapse-on-propagation` mode is used to prevent such cases.
// See the end-to-end test `no_collapse_on_propagation` for example.
if (!callee.model.no_collapse_on_propagation()) {
LOG_OR_DUMP(context, 4, "Collapsing taint tree {}", taint_tree);
taint_tree.collapse_inplace();
}
if (taint_tree.is_bottom()) {
continue;
}
FeatureMayAlwaysSet features = output_features;
features.add(propagation.features());
features.add_always(callee.model.add_features_to_arguments(input));
auto position =
context->positions.get(callee.position, input, instruction);
taint_tree.map([&features, position](Taint& taints) {
taints.add_inferred_features_and_local_position(features, position);
});
switch (output.root().kind()) {
case Root::Kind::Return: {
LOG_OR_DUMP(
context,
4,
"Tainting invoke result path {} with {}",
output.path(),
taint_tree);
result_taint.write(
output.path(), std::move(taint_tree), UpdateKind::Weak);
break;
}
case Root::Kind::Argument: {
auto output_parameter_position = output.root().parameter_position();
auto output_register_id =
instruction_sources.at(output_parameter_position);
LOG_OR_DUMP(
context,
4,
"Tainting register {} path {} with {}",
output_register_id,
output.path(),
taint_tree);
new_environment->write(
output_register_id,
output.path(),
std::move(taint_tree),
UpdateKind::Weak);
break;
}
default:
mt_unreachable();
}
}
}
if (callee.model.add_via_obscure_feature() ||
callee.model.has_add_features_to_arguments()) {
for (std::size_t parameter_position = 0;
parameter_position < instruction_sources.size();
parameter_position++) {
auto parameter = Root(Root::Kind::Argument, parameter_position);
auto features = FeatureMayAlwaysSet::make_always(
callee.model.add_features_to_arguments(parameter));
auto register_id = instruction_sources[parameter_position];
auto memory_locations =
previous_environment->memory_locations(register_id);
// Check whether an argument of the caller is passed into a callee port
// with add_features_to_arguments on it. If so, infer an
// add_features_to_arguments on the caller argument port
if (!features.empty() && memory_locations.is_value() &&
memory_locations.size() == 1) {
auto* memory_location = *memory_locations.elements().begin();
auto access_path = memory_location->access_path();
if (access_path) {
context->model.add_add_features_to_arguments(
access_path->root(), features.always());
}
}
const auto* position = !features.empty()
? context->positions.get(callee.position, parameter, instruction)
: nullptr;
if (callee.model.add_via_obscure_feature()) {
features.add_always(context->features.get("via-obscure"));
}
if (features.empty()) {
continue;
}
for (auto* memory_location : memory_locations.elements()) {
auto taint = new_environment->read(memory_location);
taint.map([&features, position](Taint& sources) {
sources.add_inferred_features_and_local_position(features, position);
});
new_environment->write(
memory_location, std::move(taint), UpdateKind::Strong);
}
}
}
}
void create_issue(
MethodContext* context,
Taint source,
Taint sink,
const Rule* rule,
const Position* position,
const FeatureMayAlwaysSet& extra_features) {
source.add_inferred_features(
context->class_properties.issue_features(context->method()));
sink.add_inferred_features(extra_features);
auto issue =
Issue(Taint{std::move(source)}, Taint{std::move(sink)}, rule, position);
LOG_OR_DUMP(context, 4, "Found issue: {}", issue);
context->model.add_issue(std::move(issue));
}
// Called when a source is detected to be flowing into a partial sink for a
// multi source rule. The set of fulfilled sinks should be accumulated for
// each argument at a callsite (an invoke operation).
void check_multi_source_multi_sink_rules(
MethodContext* context,
const Kind* source_kind,
const Taint& source,
const Kind* sink_kind,
const Taint& sink,
FulfilledPartialKindState& fulfilled_partial_sinks,
const MultiSourceMultiSinkRule* rule,
const Position* position,
const FeatureMayAlwaysSet& extra_features) {
const auto* partial_sink = sink_kind->as<PartialKind>();
mt_assert(partial_sink != nullptr);
// Features found by this branch of the multi-source-sink flow. Should be
// reported as part of the final issue discovered.
auto features = source.features_joined();
features.add(sink.features_joined());
auto issue_sink_frame = fulfilled_partial_sinks.fulfill_kind(
partial_sink, rule, features, context, sink);
if (issue_sink_frame) {
create_issue(
context, source, *issue_sink_frame, rule, position, extra_features);
} else {
LOG_OR_DUMP(
context,
4,
"Found source kind: {} flowing into partial sink: {}, rule code: {}",
*source_kind,
*partial_sink,
rule->code());
}
}
FeatureMayAlwaysSet get_fulfilled_sink_features(
const FulfilledPartialKindState& fulfilled_partial_sinks,
const Kind* transformed_sink_kind) {
const auto* new_kind = transformed_sink_kind->as<TriggeredPartialKind>();
// Called only after transform_kind_with_features creates a triggered kind,
// so this must be a TriggeredPartialKind.
mt_assert(new_kind != nullptr);
const auto* rule = new_kind->rule();
const auto* counterpart = fulfilled_partial_sinks.get_fulfilled_counterpart(
/* unfulfilled_kind */ new_kind->partial_kind(), rule);
// A triggered kind was created, so its counterpart must exist.
mt_assert(counterpart != nullptr);
return fulfilled_partial_sinks.get_features(counterpart, rule);
}
void create_sinks(
MethodContext* context,
const Taint& sources,
const Taint& sinks,
const FeatureMayAlwaysSet& extra_features = {},
const FulfilledPartialKindState& fulfilled_partial_sinks = {}) {
if (sources.is_bottom() || sinks.is_bottom()) {
return;
}
auto partitioned_by_artificial_sources = sources.partition_by_kind<bool>(
[&](const Kind* kind) { return kind == Kinds::artificial_source(); });
auto artificial_sources = partitioned_by_artificial_sources.find(true);
if (artificial_sources == partitioned_by_artificial_sources.end()) {
// Sinks are created when artificial sources are found flowing into them.
// No artificial sources, therefore no sinks.
return;
}
for (const auto& source : artificial_sources->second) {
for (const auto& artificial_source : source) {
auto features = extra_features;
features.add_always(context->model.attach_to_sinks(
artificial_source.callee_port().root()));
features.add(artificial_source.features());
auto new_sinks = sinks.transform_kind_with_features(
[context, &fulfilled_partial_sinks](
const Kind* sink_kind) -> std::vector<const Kind*> {
const auto* partial_sink = sink_kind->as<PartialKind>();
if (!partial_sink) {
// No transformation. Keep sink as it is.
return {sink_kind};
}
return fulfilled_partial_sinks.make_triggered_counterparts(
context, /* unfulfilled_kind */ partial_sink);
},
[&fulfilled_partial_sinks](const Kind* new_kind) {
return get_fulfilled_sink_features(
fulfilled_partial_sinks, new_kind);
});
new_sinks.add_inferred_features(features);
new_sinks.set_local_positions(source.local_positions());
LOG_OR_DUMP(
context,
4,
"Inferred sink for port {}: {}",
artificial_source.callee_port(),
new_sinks);
context->model.add_inferred_sinks(
artificial_source.callee_port(), std::move(new_sinks));
}
}
}
// Checks if the given sources/sinks fulfill any rule. If so, create an issue.
//
// If fulfilled_partial_sinks is non-null, also checks for multi-source rules
// (partial rules). If a partial rule is fulfilled, this converts a partial
// sink to a triggered sink and accumulates this list of triggered sinks. How
// these sinks should be handled depends on what happens at other sinks/ports
// within the same callsite/invoke. The caller MUST accumulate triggered sinks
// at the callsite then call create_sinks. Regular sinks are also not created in
// this mode.
//
// If fulfilled_partial_sinks is null, regular sinks will be created if an
// artificial source is found to be flowing into a sink.
void check_flows(
MethodContext* context,
const Taint& sources,
const Taint& sinks,
const Position* position,
const FeatureMayAlwaysSet& extra_features,
FulfilledPartialKindState* MT_NULLABLE fulfilled_partial_sinks) {
if (sources.is_bottom() || sinks.is_bottom()) {
return;
}
auto sources_by_kind = sources.partition_by_kind();
auto sinks_by_kind = sinks.partition_by_kind();
for (const auto& [source_kind, source_taint] : sources_by_kind) {
if (source_kind == Kinds::artificial_source()) {
continue;
}
for (const auto& [sink_kind, sink_taint] : sinks_by_kind) {
// Check if this satisfies any rule. If so, create the issue.
const auto& rules = context->rules.rules(source_kind, sink_kind);
for (const auto* rule : rules) {
create_issue(
context, source_taint, sink_taint, rule, position, extra_features);
}
// Check if this satisfies any partial (multi-source/sink) rule.
if (fulfilled_partial_sinks) {
const auto* MT_NULLABLE partial_sink = sink_kind->as<PartialKind>();
if (partial_sink) {
const auto& partial_rules =
context->rules.partial_rules(source_kind, partial_sink);
for (const auto* partial_rule : partial_rules) {
check_multi_source_multi_sink_rules(
context,
source_kind,
source_taint,
sink_kind,
sink_taint,
*fulfilled_partial_sinks,
partial_rule,
position,
extra_features);
}
}
}
}
}
if (!fulfilled_partial_sinks) {
create_sinks(context, sources, sinks, extra_features);
}
}
void check_flows(
MethodContext* context,
const AnalysisEnvironment* environment,
const std::vector<Register>& instruction_sources,
const Callee& callee,
const FeatureMayAlwaysSet& extra_features = {}) {
LOG_OR_DUMP(
context,
4,
"Processing sinks for call to `{}`",
show(callee.method_reference));
FulfilledPartialKindState fulfilled_partial_sinks;
std::vector<std::tuple<AccessPath, Taint, const Taint&>> port_sources_sinks;
for (const auto& [port, sinks] : callee.model.sinks().elements()) {
if (!port.root().is_argument()) {
continue;
}
auto parameter_position = port.root().parameter_position();
if (parameter_position >= instruction_sources.size()) {
continue;
}
auto register_id = instruction_sources.at(parameter_position);
Taint sources = environment->read(register_id, port.path()).collapse();
check_flows(
context,
sources,
sinks,
callee.position,
extra_features,
&fulfilled_partial_sinks);
port_sources_sinks.push_back(
std::make_tuple(port, std::move(sources), std::cref(sinks)));
}
// Create the sinks, checking at each point, if any partial sinks should
// become triggered. This must not happen in the loop above because we need
// the full set of triggered sinks at all positions/port of the callsite.
//
// Example: callsite(partial_sink_A, triggered_sink_B).
// Scenario: triggered_sink_B discovered in check_flows above when a source
// flows into the argument.
//
// This next loop needs that information to convert partial_sink_A into a
// triggered sink to be propagated if it is reachable via artifical sources.
//
// Outside of multi-source rules, this also creates regular sinks for the
// method if an artificial source is found flowing into a sink.
for (const auto& [port, sources, sinks] : port_sources_sinks) {
create_sinks(
context, sources, sinks, extra_features, fulfilled_partial_sinks);
}
}
void check_flows_to_array_allocation(
MethodContext* context,
AnalysisEnvironment* environment,
const IRInstruction* instruction) {
auto* array_allocation_method = context->methods.get(
context->artificial_methods.array_allocation_method());
auto* position =
context->positions.get(context->method(), environment->last_position());
auto array_allocation_sink = Taint{Frame(
/* kind */ context->artificial_methods.array_allocation_kind(),
/* callee_port */ AccessPath(Root(Root::Kind::Argument, 0)),
/* callee */ array_allocation_method,
/* field_callee */ nullptr,
/* call_position */ position,
/* distance */ 1,
/* origins */ MethodSet{array_allocation_method},
/* field_origins */ {},
/* inferred features */ {},
/* locally_inferred_features */ {},
/* user features */ {},
/* via_type_of_ports */ {},
/* via_value_of_ports */ {},
/* local_positions */ {},
/* canonical_names */ {})};
auto instruction_sources = instruction->srcs_vec();
for (std::size_t parameter_position = 0;
parameter_position < instruction_sources.size();
parameter_position++) {
auto register_id = instruction_sources.at(parameter_position);
Taint sources = environment->read(register_id).collapse();
// Fulfilled partial sinks ignored. No partial sinks for array allocation.
check_flows(
context,
sources,
array_allocation_sink,
position,
/* extra_features */ {},
/* fulfilled_partial_sinks */ nullptr);
}
}
void check_flows(
MethodContext* context,
const AnalysisEnvironment* environment,
const IRInstruction* instruction,
const Callee& callee) {
check_flows(context, environment, instruction->srcs_vec(), callee);
}
void analyze_artificial_calls(
MethodContext* context,
const IRInstruction* instruction,
AnalysisEnvironment* environment) {
const auto& artificial_callees =
context->call_graph.artificial_callees(context->method(), instruction);
for (const auto& artificial_callee : artificial_callees) {
check_flows(
context,
environment,
artificial_callee.register_parameters,
get_callee(context, environment, artificial_callee),
FeatureMayAlwaysSet::make_always(artificial_callee.features));
}
}
MemoryLocation* MT_NULLABLE try_alias_this_location(
MethodContext* context,
AnalysisEnvironment* environment,
const Callee& callee,
const IRInstruction* instruction) {
if (!callee.model.alias_memory_location_on_invoke()) {
return nullptr;
}
if (callee.resolved_base_method && callee.resolved_base_method->is_static()) {
return nullptr;
}
auto register_id = instruction->srcs_vec().at(0);
auto memory_locations = environment->memory_locations(register_id);
if (!memory_locations.is_value() || memory_locations.size() != 1) {
return nullptr;
}
auto* memory_location = *memory_locations.elements().begin();
LOG_OR_DUMP(
context,
4,
"Method invoke aliasing existing memory location {}",
show(memory_location));
return memory_location;
}
// If the method invoke can be safely inlined, return the result memory
// location, otherwise return nullptr.
MemoryLocation* MT_NULLABLE try_inline_invoke(
MethodContext* context,
const AnalysisEnvironment* environment,
const IRInstruction* instruction,
const Callee& callee) {
auto access_path = callee.model.inline_as().get_constant();
if (!access_path) {
return nullptr;
}
auto register_id = instruction->src(access_path->root().parameter_position());
auto memory_locations = environment->memory_locations(register_id);
if (!memory_locations.is_value() || memory_locations.size() != 1) {
return nullptr;
}
auto memory_location = *memory_locations.elements().begin();
for (const auto* field : access_path->path()) {
memory_location = memory_location->make_field(field);
}
// Only inline if the model does not generate or propagate extra taint.
if (!callee.model.generations().is_bottom() ||
!callee.model.propagations().leq(PropagationAccessPathTree({
{AccessPath(Root(Root::Kind::Return)),
PropagationSet{Propagation(
/* input */ *access_path,
/* inferred_features */ FeatureMayAlwaysSet(),
/* user_features */ FeatureSet::bottom())}},
})) ||
callee.model.add_via_obscure_feature() ||
callee.model.has_add_features_to_arguments()) {
return nullptr;
}
LOG_OR_DUMP(context, 4, "Inlining method call");
return memory_location;
}
} // namespace
bool Transfer::analyze_invoke(
MethodContext* context,
const IRInstruction* instruction,
AnalysisEnvironment* environment) {
log_instruction(context, instruction);
auto callee = get_callee(context, environment, instruction);
const AnalysisEnvironment previous_environment = *environment;
TaintTree result_taint;
check_flows(context, &previous_environment, instruction, callee);
apply_propagations(
context,
&previous_environment,
environment,
instruction,
callee,
result_taint);
apply_generations(context, environment, instruction, callee, result_taint);
if (callee.resolved_base_method &&
callee.resolved_base_method->returns_void()) {
LOG_OR_DUMP(context, 4, "Resetting the result register");
environment->assign(k_result_register, MemoryLocationsDomain::bottom());
} else if (
auto* memory_location =
try_inline_invoke(context, environment, instruction, callee)) {
LOG_OR_DUMP(
context, 4, "Setting result register to {}", show(memory_location));
environment->assign(k_result_register, memory_location);
} else {
// Check if the method can alias existing memory location
memory_location =
try_alias_this_location(context, environment, callee, instruction);
// Assume the method call returns a new memory location,
// that does not alias with anything.
if (memory_location == nullptr) {
memory_location = context->memory_factory.make_location(instruction);
}
LOG_OR_DUMP(
context, 4, "Setting result register to {}", show(memory_location));
environment->assign(k_result_register, memory_location);
LOG_OR_DUMP(
context, 4, "Tainting {} with {}", show(memory_location), result_taint);
environment->write(memory_location, result_taint, UpdateKind::Weak);
}
analyze_artificial_calls(context, instruction, environment);
return false;
}
bool is_inner_class_this(const FieldMemoryLocation* location) {
return location->parent()->is<ThisParameterMemoryLocation>() &&
location->field()->str() == "this$0";
}
void add_field_features(
MethodContext* context,
AbstractTreeDomain<Taint>& taint,
const FieldMemoryLocation* field_memory_location) {
if (!is_inner_class_this(field_memory_location)) {
return;
}
auto features = FeatureMayAlwaysSet::make_always(
{context->features.get("via-inner-class-this")});
taint.map(
[&features](Taint& sources) { sources.add_inferred_features(features); });
}
bool Transfer::analyze_iput(
MethodContext* context,
const IRInstruction* instruction,
AnalysisEnvironment* environment) {
log_instruction(context, instruction);
mt_assert(instruction->srcs().size() == 2);
mt_assert(instruction->has_field());
auto taint = environment->read(/* register */ instruction->srcs()[0]);
auto* position = context->positions.get(
context->method(),
environment->last_position(),
Root(Root::Kind::Return),
instruction);
taint.map(
[position](Taint& sources) { sources.add_local_position(position); });
// Check if the taint above flows into a field sink
const auto* field =
context->call_graph.resolved_field_access(context->method(), instruction);
if (!field) {
WARNING_OR_DUMP(
context,
3,
"Unable to resolve access of field for iput {}",
show(instruction->get_field()));
} else {
auto field_model = field ? context->registry.get(field) : FieldModel();
auto sinks = field_model.sinks();
if (!sinks.empty() && !taint.is_bottom()) {
for (const auto& [port, sources] : taint.elements()) {
check_flows(
context,
sources,
sinks,
position,
/* extra_features */ FeatureMayAlwaysSet(),
/* fulfilled_partial_sinks */ nullptr);
}
}
}
// Store the taint in the memory location(s) representing the field
auto* field_name = instruction->get_field()->get_name();
auto target_memory_locations =
environment->memory_locations(/* register */ instruction->srcs()[1]);
bool is_singleton = target_memory_locations.elements().size() == 1;
for (auto* memory_location : target_memory_locations.elements()) {
auto field_memory_location = memory_location->make_field(field_name);
auto taint_copy = taint;
add_field_features(context, taint_copy, field_memory_location);
LOG_OR_DUMP(
context,
4,
"Tainting {} with {}",
show(field_memory_location),
taint_copy);
environment->write(
field_memory_location,
taint_copy,
is_singleton ? UpdateKind::Strong : UpdateKind::Weak);
}
analyze_artificial_calls(context, instruction, environment);
return false;
}
bool Transfer::analyze_sput(
MethodContext* context,
const IRInstruction* instruction,
AnalysisEnvironment* environment) {
log_instruction(context, instruction);
mt_assert(instruction->srcs().size() == 1);
mt_assert(instruction->has_field());
auto taint = environment->read(/* register */ instruction->srcs()[0]);
if (taint.is_bottom()) {
return false;
}
auto* position = context->positions.get(
context->method(),
environment->last_position(),
Root(Root::Kind::Return),
instruction);
taint.map(
[position](Taint& sources) { sources.add_local_position(position); });
const auto* field =
context->call_graph.resolved_field_access(context->method(), instruction);
if (!field) {
WARNING_OR_DUMP(
context,
3,
"Unable to resolve access of field for sput {}",
show(instruction->get_field()));
return false;
}
auto field_model = field ? context->registry.get(field) : FieldModel();
auto sinks = field_model.sinks();
if (sinks.empty()) {
return false;
}
for (const auto& [port, sources] : taint.elements()) {
check_flows(
context,
sources,
sinks,
position,
/* extra_features */ FeatureMayAlwaysSet(),
/* fulfilled_partial_sinks */ nullptr);
}
return false;
}
bool Transfer::analyze_load_param(
MethodContext* context,
const IRInstruction* instruction,
AnalysisEnvironment* environment) {
log_instruction(context, instruction);
auto abstract_parameter = environment->last_parameter_loaded();
if (!abstract_parameter.is_value()) {
ERROR_OR_DUMP(context, 1, "Failed to deduce the parameter of a load");
return false;
}
auto parameter_position = *abstract_parameter.get_constant();
environment->increment_last_parameter_loaded();
// Create a memory location that represents the argument.
auto memory_location =
context->memory_factory.make_parameter(parameter_position);
LOG_OR_DUMP(
context,
4,
"Setting register {} to {}",
instruction->dest(),
show(memory_location));
environment->assign(instruction->dest(), memory_location);
// Add parameter sources specified in model generators.
auto root = Root(Root::Kind::Argument, parameter_position);
auto taint = context->model.parameter_sources().read(root);
// Add the position of the instruction to the parameter sources.
auto* position = context->positions.get(context->method());
taint.map([position](Taint& sources) {
sources = sources.attach_position(position);
});
// Introduce an artificial parameter source in order to infer sinks and
// propagations.
taint.write(
Path{},
Taint{Frame::artificial_source(AccessPath(root))},
UpdateKind::Weak);
LOG_OR_DUMP(context, 4, "Tainting {} with {}", show(memory_location), taint);
environment->write(memory_location, std::move(taint), UpdateKind::Strong);
return false;
}
bool Transfer::analyze_move(
MethodContext* context,
const IRInstruction* instruction,
AnalysisEnvironment* environment) {
log_instruction(context, instruction);
mt_assert(instruction->srcs().size() == 1);
auto memory_locations =
environment->memory_locations(/* register */ instruction->srcs()[0]);
LOG_OR_DUMP(
context,
4,
"Setting register {} to {}",
instruction->dest(),
memory_locations);
environment->assign(instruction->dest(), memory_locations);
return false;
}
bool Transfer::analyze_move_result(
MethodContext* context,
const IRInstruction* instruction,
AnalysisEnvironment* environment) {
log_instruction(context, instruction);
auto memory_locations = environment->memory_locations(k_result_register);
LOG_OR_DUMP(
context,
4,
"Setting register {} to {}",
instruction->dest(),
memory_locations);
environment->assign(instruction->dest(), memory_locations);
LOG_OR_DUMP(context, 4, "Resetting the result register");
environment->assign(k_result_register, MemoryLocationsDomain::bottom());
return false;
}
bool Transfer::analyze_aget(
MethodContext* context,
const IRInstruction* instruction,
AnalysisEnvironment* environment) {
log_instruction(context, instruction);
mt_assert(instruction->srcs().size() == 2);
// We use a single memory location for the array and its elements.
auto memory_locations =
environment->memory_locations(/* register */ instruction->srcs()[0]);
LOG_OR_DUMP(context, 4, "Setting result register to {}", memory_locations);
environment->assign(k_result_register, memory_locations);
return false;
}
bool Transfer::analyze_aput(
MethodContext* context,
const IRInstruction* instruction,
AnalysisEnvironment* environment) {
log_instruction(context, instruction);
mt_assert(instruction->srcs().size() == 3);
auto taint = environment->read(
/* register */ instruction->srcs()[0]);
auto features =
FeatureMayAlwaysSet::make_always({context->features.get("via-array")});
auto* position = context->positions.get(
context->method(),
environment->last_position(),
Root(Root::Kind::Return),
instruction);
taint.map([&features, position](Taint& sources) {
sources.add_inferred_features_and_local_position(features, position);
});
// We use a single memory location for the array and its elements.
auto target_memory_locations =
environment->memory_locations(/* register */ instruction->srcs()[1]);
for (auto* memory_location : target_memory_locations.elements()) {
LOG_OR_DUMP(
context, 4, "Tainting {} with {}", show(memory_location), taint);
environment->write(memory_location, taint, UpdateKind::Weak);
}
return false;
}
bool Transfer::analyze_new_array(
MethodContext* context,
const IRInstruction* instruction,
AnalysisEnvironment* environment) {
check_flows_to_array_allocation(context, environment, instruction);
return analyze_default(context, instruction, environment);
}
bool Transfer::analyze_filled_new_array(
MethodContext* context,
const IRInstruction* instruction,
AnalysisEnvironment* environment) {
check_flows_to_array_allocation(context, environment, instruction);
return analyze_default(context, instruction, environment);
}
namespace {
static bool analyze_numerical_operator(
MethodContext* context,
const IRInstruction* instruction,
AnalysisEnvironment* environment) {
log_instruction(context, instruction);
TaintTree taint;
for (auto register_id : instruction->srcs()) {
taint.join_with(environment->read(register_id));
}
auto features = FeatureMayAlwaysSet::make_always(
{context->features.get("via-numerical-operator")});
auto* position = context->positions.get(
context->method(),
environment->last_position(),
Root(Root::Kind::Return),
instruction);
taint.map([&features, position](Taint& sources) {
sources.add_inferred_features_and_local_position(features, position);
});
// Assume the instruction creates a new memory location.
auto memory_location = context->memory_factory.make_location(instruction);
if (instruction->has_dest()) {
LOG_OR_DUMP(
context,
4,
"Setting register {} to {}",
instruction->dest(),
show(memory_location));
environment->assign(instruction->dest(), memory_location);
} else if (instruction->has_move_result_any()) {
LOG_OR_DUMP(
context, 4, "Setting result register to {}", show(memory_location));
environment->assign(k_result_register, memory_location);
} else {
return false;
}
LOG_OR_DUMP(context, 4, "Tainting {} with {}", show(memory_location), taint);
environment->write(memory_location, taint, UpdateKind::Strong);
return false;
}
} // namespace
bool Transfer::analyze_unop(
MethodContext* context,
const IRInstruction* instruction,
AnalysisEnvironment* environment) {
return analyze_numerical_operator(context, instruction, environment);
}
bool Transfer::analyze_binop(
MethodContext* context,
const IRInstruction* instruction,
AnalysisEnvironment* environment) {
return analyze_numerical_operator(context, instruction, environment);
}
bool Transfer::analyze_binop_lit(
MethodContext* context,
const IRInstruction* instruction,
AnalysisEnvironment* environment) {
return analyze_numerical_operator(context, instruction, environment);
}
namespace {
// Infer propagations and generations for the output `taint` on port `root`.
void infer_output_taint(
MethodContext* context,
Root root,
const TaintTree& taint) {
for (const auto& [path, sources] : taint.elements()) {
auto partitioned_by_artificial_sources = sources.partition_by_kind<bool>(
[&](const Kind* kind) { return kind == Kinds::artificial_source(); });
auto real_sources = partitioned_by_artificial_sources.find(false);
if (real_sources != partitioned_by_artificial_sources.end()) {
for (const auto& source : real_sources->second) {
auto generation = source;
generation.add_inferred_features(FeatureMayAlwaysSet::make_always(
context->model.attach_to_sources(root)));
auto port = AccessPath(root, path);
LOG_OR_DUMP(
context,
4,
"Inferred generation for port {}: {}",
port,
generation);
context->model.add_inferred_generations(
std::move(port), Taint{std::move(generation)});
}
}
auto artificial_sources = partitioned_by_artificial_sources.find(true);
if (artificial_sources != partitioned_by_artificial_sources.end()) {
for (const auto& source : artificial_sources->second) {
for (const auto& artificial_source : source) {
if (artificial_source.callee_port().root() != root) {
const auto& input = artificial_source.callee_port();
auto output = AccessPath(root, path);
auto features = artificial_source.features();
features.add_always(
context->model.attach_to_propagations(input.root()));
features.add_always(context->model.attach_to_propagations(root));
auto propagation = Propagation(
input,
/* inferred_features */ features,
/* user_features */ FeatureSet::bottom());
LOG_OR_DUMP(
context,
4,
"Inferred propagation {} to {}",
propagation,
output);
context->model.add_inferred_propagation(
std::move(propagation), std::move(output));
}
}
}
}
}
}
bool has_side_effect(const MethodItemEntry& instruction) {
switch (instruction.type) {
case MFLOW_OPCODE:
switch (instruction.insn->opcode()) {
case IOPCODE_LOAD_PARAM:
case IOPCODE_LOAD_PARAM_OBJECT:
case IOPCODE_LOAD_PARAM_WIDE:
case OPCODE_NOP:
case OPCODE_MOVE:
case OPCODE_MOVE_WIDE:
case OPCODE_MOVE_OBJECT:
case OPCODE_MOVE_RESULT:
case OPCODE_MOVE_RESULT_WIDE:
case OPCODE_MOVE_RESULT_OBJECT:
case IOPCODE_MOVE_RESULT_PSEUDO:
case IOPCODE_MOVE_RESULT_PSEUDO_OBJECT:
case IOPCODE_MOVE_RESULT_PSEUDO_WIDE:
case OPCODE_RETURN_VOID:
case OPCODE_RETURN:
case OPCODE_RETURN_WIDE:
case OPCODE_RETURN_OBJECT:
case OPCODE_CONST:
case OPCODE_CONST_WIDE:
case OPCODE_IGET:
case OPCODE_IGET_WIDE:
case OPCODE_IGET_OBJECT:
case OPCODE_IGET_BOOLEAN:
case OPCODE_IGET_BYTE:
case OPCODE_IGET_CHAR:
case OPCODE_IGET_SHORT:
return false;
default:
return true;
}
break;
case MFLOW_DEBUG:
case MFLOW_POSITION:
case MFLOW_FALLTHROUGH:
return false;
default:
return true;
}
}
// Infer whether the method could be inlined.
AccessPathConstantDomain infer_inline_as(
MethodContext* context,
const MemoryLocationsDomain& memory_locations) {
// Check if we are returning an argument access path.
if (!memory_locations.is_value() || memory_locations.size() != 1 ||
context->model.has_global_propagation_sanitizer()) {
return AccessPathConstantDomain::top();
}
auto* memory_location = *memory_locations.elements().begin();
auto access_path = memory_location->access_path();
if (!access_path) {
return AccessPathConstantDomain::top();
}
LOG_OR_DUMP(
context, 4, "Instruction returns the access path: {}", *access_path);
// Check if the method has any side effect.
const auto* code = context->method()->get_code();
mt_assert(code != nullptr);
const auto& cfg = code->cfg();
if (cfg.blocks().size() != 1) {
// There could be multiple return statements.
LOG_OR_DUMP(
context, 4, "Method has multiple basic blocks, it cannot be inlined.");
return AccessPathConstantDomain::top();
}
auto* entry_block = cfg.entry_block();
auto found =
std::find_if(entry_block->begin(), entry_block->end(), has_side_effect);
if (found != entry_block->end()) {
LOG_OR_DUMP(
context,
4,
"Method has an instruction with possible side effects: {}, it cannot be inlined.",
show(*found));
return AccessPathConstantDomain::top();
}
LOG_OR_DUMP(context, 4, "Method can be inlined as {}", *access_path);
return AccessPathConstantDomain(*access_path);
}
} // namespace
bool Transfer::analyze_return(
MethodContext* context,
const IRInstruction* instruction,
AnalysisEnvironment* environment) {
log_instruction(context, instruction);
auto return_sinks = context->model.sinks().read(Root(Root::Kind::Return));
// Add the position of the instruction to the return sinks.
auto* position =
context->positions.get(context->method(), environment->last_position());
return_sinks.map(
[position](Taint& sinks) { sinks = sinks.attach_position(position); });
for (auto register_id : instruction->srcs()) {
auto memory_locations = environment->memory_locations(register_id);
context->model.set_inline_as(infer_inline_as(context, memory_locations));
infer_output_taint(
context, Root(Root::Kind::Return), environment->read(memory_locations));
for (const auto& [path, sinks] : return_sinks.elements()) {
Taint sources = environment->read(register_id, path).collapse();
// Fulfilled partial sinks are not expected to be produced here. Return
// sinks are never partial.
check_flows(
context,
sources,
sinks,
position,
/* extra_features */ {},
/* fulfilled_partial_sinks */ nullptr);
}
}
if (!context->method()->is_static()) {
infer_output_taint(
context,
Root(Root::Kind::Argument, 0),
environment->read(context->memory_factory.make_parameter(0)));
}
return false;
}
} // namespace marianatrench