source/TaintV2.h (134 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <initializer_list>
#include <ostream>
#include <json/json.h>
#include <AbstractDomain.h>
#include <mariana-trench/CalleeFrames.h>
#include <mariana-trench/Frame.h>
#include <mariana-trench/GroupHashedSetAbstractDomain.h>
namespace marianatrench {
/**
* Represents an abstract taint, as a map from taint kind to set of frames.
* Replacement of `Taint`.
*/
class TaintV2 final : public sparta::AbstractDomain<TaintV2> {
private:
struct GroupEqual {
bool operator()(const CalleeFrames& left, const CalleeFrames& right) const {
return left.callee() == right.callee();
}
};
struct GroupHash {
std::size_t operator()(const CalleeFrames& frame) const {
return std::hash<const Method*>()(frame.callee());
}
};
struct GroupDifference {
void operator()(CalleeFrames& left, const CalleeFrames& right) const {
left.difference_with(right);
}
};
using Set = GroupHashedSetAbstractDomain<
CalleeFrames,
GroupHash,
GroupEqual,
GroupDifference>;
public:
/* Create the bottom (i.e, empty) taint. */
TaintV2() = default;
explicit TaintV2(std::initializer_list<Frame> frames);
TaintV2(const TaintV2&) = default;
TaintV2(TaintV2&&) = default;
TaintV2& operator=(const TaintV2&) = default;
TaintV2& operator=(TaintV2&&) = default;
static TaintV2 bottom() {
return TaintV2();
}
static TaintV2 top() {
mt_unreachable(); // Not implemented.
}
bool is_bottom() const override {
return set_.is_bottom();
}
bool is_top() const override {
return set_.is_top();
}
void set_to_bottom() override {
set_.set_to_bottom();
}
void set_to_top() override {
set_.set_to_top();
}
std::size_t size() const {
return set_.size();
}
bool empty() const {
return set_.empty();
}
void add(const Frame& frame);
void clear() {
set_.clear();
}
bool leq(const TaintV2& other) const override;
bool equals(const TaintV2& other) const override;
void join_with(const TaintV2& other) override;
void widen_with(const TaintV2& other) override;
void meet_with(const TaintV2& other) override;
void narrow_with(const TaintV2& other) override;
void difference_with(const TaintV2& other);
void add_inferred_features(const FeatureMayAlwaysSet& features);
void add_local_position(const Position* position);
void set_local_positions(const LocalPositionSet& positions);
void add_inferred_features_and_local_position(
const FeatureMayAlwaysSet& features,
const Position* MT_NULLABLE position);
/**
* Propagate the taint from the callee to the caller.
*
* Return bottom if the taint should not be propagated.
*/
TaintV2 propagate(
const Method* callee,
const AccessPath& callee_port,
const Position* call_position,
int maximum_source_sink_distance,
const FeatureMayAlwaysSet& extra_features,
Context& context,
const std::vector<const DexType * MT_NULLABLE>& source_register_types,
const std::vector<std::optional<std::string>>& source_constant_arguments)
const;
/* Return the set of leaf frames with the given position. */
TaintV2 attach_position(const Position* position) const;
/**
* Transforms kinds in the taint according to the function in the first arg.
* Returning an empty vec will cause frames for the input kind to be dropped.
* If a transformation occurs (returns more than a vector containing just the
* input kind), locally inferred features can be added to the frames of the
* transformed kinds (return `bottom()` to add nothing).
*
* If multiple kinds map to the same kind, their respective frames will be
* joined. This means "always" features could turn into "may" features. At
* time of writing, there should be no such use-case, but new callers should
* be mindful of this behavior.
*/
TaintV2 transform_kind_with_features(
const std::function<std::vector<const Kind*>(const Kind*)>&,
const std::function<FeatureMayAlwaysSet(const Kind*)>&) const;
/**
* Appends `path_element` to the callee ports of all kind that pass `filter`
* (returns true).
*/
void append_callee_port(
Path::Element path_element,
const std::function<bool(const Kind*)>& filter);
/**
* Update call and local positions of all non-leaf frames.
* `new_call_position` is given callee, callee_port and (existing) position.
* `new_local_positions` is given existing local positions.
*/
void update_non_leaf_positions(
const std::function<
const Position*(const Method*, const AccessPath&, const Position*)>&
new_call_position,
const std::function<LocalPositionSet(const LocalPositionSet&)>&
new_local_positions);
/**
* Drops frames that are considered invalid.
* `is_valid` is given callee (nullptr for leaves), callee_port, kind.
*/
void filter_invalid_frames(
const std::function<
bool(const Method* MT_NULLABLE, const AccessPath&, const Kind*)>&
is_valid);
/**
* Returns true if any frame contains the given kind.
*/
bool contains_kind(const Kind*) const;
/**
* Returns a map of `Kind` -> `Taint`, where each `Taint` value contains only
* the frames with the `Kind` in its key.
*/
std::unordered_map<const Kind*, TaintV2> partition_by_kind() const;
/**
* Similar to `partition_by_kind()` but caller gets to decide what value of
* type `T` each kind maps to.
*/
template <class T>
std::unordered_map<T, TaintV2> partition_by_kind(
const std::function<T(const Kind*)>& map_kind) const {
std::unordered_map<T, TaintV2> result;
for (const auto& callee_frames : set_) {
auto callee_frames_partitioned =
callee_frames.partition_by_kind(map_kind);
for (const auto& [mapped_value, callee_frames] :
callee_frames_partitioned) {
auto existing = result.find(mapped_value);
auto existing_or_bottom =
existing == result.end() ? TaintV2::bottom() : existing->second;
existing_or_bottom.add(callee_frames);
result[mapped_value] = existing_or_bottom;
}
}
return result;
}
/**
* Returns all features for this taint tree, joined as `FeatureMayAlwaysSet`.
*/
FeatureMayAlwaysSet features_joined() const;
private:
void add(const CalleeFrames& frames);
void map(const std::function<void(CalleeFrames&)>& f);
private:
Set set_;
};
} // namespace marianatrench