source/TaintV1.h (150 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <initializer_list>
#include <ostream>
#include <json/json.h>
#include <AbstractDomain.h>
#include <mariana-trench/Frame.h>
#include <mariana-trench/FrameSet.h>
#include <mariana-trench/GroupHashedSetAbstractDomain.h>
namespace marianatrench {
/* Represents an abstract taint, as a map from taint kind to set of frames. */
class TaintV1 final : public sparta::AbstractDomain<TaintV1> {
private:
struct GroupEqual {
bool operator()(const FrameSet& left, const FrameSet& right) const {
return left.kind() == right.kind();
}
};
struct GroupHash {
std::size_t operator()(const FrameSet& frame) const {
return std::hash<const Kind*>()(frame.kind());
}
};
struct GroupDifference {
void operator()(FrameSet& left, const FrameSet& right) const {
left.difference_with(right);
}
};
using Set = GroupHashedSetAbstractDomain<
FrameSet,
GroupHash,
GroupEqual,
GroupDifference>;
public:
// C++ container concept member types
using iterator = typename Set::iterator;
using const_iterator = typename Set::const_iterator;
using value_type = FrameSet;
using difference_type = std::ptrdiff_t;
using size_type = std::size_t;
using const_reference = const FrameSet&;
using const_pointer = const FrameSet*;
public:
/* Create the bottom (i.e, empty) taint. */
TaintV1() = default;
explicit TaintV1(std::initializer_list<Frame> frames);
explicit TaintV1(std::initializer_list<FrameSet> frames);
TaintV1(const TaintV1&) = default;
TaintV1(TaintV1&&) = default;
TaintV1& operator=(const TaintV1&) = default;
TaintV1& operator=(TaintV1&&) = default;
static TaintV1 bottom() {
return TaintV1();
}
static TaintV1 top() {
mt_unreachable(); // Not implemented.
}
bool is_bottom() const override {
return set_.is_bottom();
}
bool is_top() const override {
return set_.is_top();
}
void set_to_bottom() override {
set_.set_to_bottom();
}
void set_to_top() override {
set_.set_to_top();
}
std::size_t size() const {
return set_.size();
}
bool empty() const {
return set_.empty();
}
const_iterator begin() const {
return set_.begin();
}
const_iterator end() const {
return set_.end();
}
void add(const Frame& frame);
void add(const FrameSet& frames);
void clear() {
set_.clear();
}
bool leq(const TaintV1& other) const override;
bool equals(const TaintV1& other) const override;
void join_with(const TaintV1& other) override;
void widen_with(const TaintV1& other) override;
void meet_with(const TaintV1& other) override;
void narrow_with(const TaintV1& other) override;
void difference_with(const TaintV1& other);
void add_inferred_features(const FeatureMayAlwaysSet& features);
void add_local_position(const Position* position);
void set_local_positions(const LocalPositionSet& positions);
void add_inferred_features_and_local_position(
const FeatureMayAlwaysSet& features,
const Position* MT_NULLABLE position);
/**
* Propagate the taint from the callee to the caller.
*
* Return bottom if the taint should not be propagated.
*/
TaintV1 propagate(
const Method* caller,
const Method* callee,
const AccessPath& callee_port,
const Position* call_position,
int maximum_source_sink_distance,
const FeatureMayAlwaysSet& extra_features,
Context& context,
const std::vector<const DexType * MT_NULLABLE>& source_register_types,
const std::vector<std::optional<std::string>>& source_constant_arguments)
const;
/* Return the set of leaf frames with the given position. */
TaintV1 attach_position(const Position* position) const;
/**
* Transforms kinds in the taint according to the function in the first arg.
* Returning an empty vec will cause frames for the input kind to be dropped.
* If a transformation occurs (returns more than a vector containing just the
* input kind), locally inferred features can be added to the frames of the
* transformed kinds (return `bottom()` to add nothing).
*
* If multiple kinds map to the same kind, their respective frames will be
* joined. This means "always" features could turn into "may" features. At
* time of writing, there should be no such use-case, but new callers should
* be mindful of this behavior.
*/
TaintV1 transform_kind_with_features(
const std::function<std::vector<const Kind*>(const Kind*)>&,
const std::function<FeatureMayAlwaysSet(const Kind*)>&) const;
static TaintV1 from_json(const Json::Value& value, Context& context);
Json::Value to_json() const;
friend std::ostream& operator<<(std::ostream& out, const TaintV1& taint);
// TaintV1 transformation methods by scenario
/**
* Appends `path_element` to the callee ports of all kind that pass `filter`
* (returns true).
*/
void append_callee_port(
Path::Element path_element,
const std::function<bool(const Kind*)>& filter);
/**
* Update call and local positions of all non-leaf frames.
* `new_call_position` is given callee, callee_port and (existing) position.
* `new_local_positions` is given existing local positions.
*/
void update_non_leaf_positions(
const std::function<
const Position*(const Method*, const AccessPath&, const Position*)>&
new_call_position,
const std::function<LocalPositionSet(const LocalPositionSet&)>&
new_local_positions);
/**
* Drops frames that are considered invalid.
* `is_valid` is given callee (nullptr for leaves), callee_port, kind.
*/
void filter_invalid_frames(
const std::function<
bool(const Method* MT_NULLABLE, const AccessPath&, const Kind*)>&
is_valid);
/**
* Returns true if any frame contains the given kind.
*/
bool contains_kind(const Kind*) const;
/**
* Returns a map of `Kind` -> `TaintV1`, where each `TaintV1` value contains
* only the frames with the `Kind` in its key.
*/
std::unordered_map<const Kind*, TaintV1> partition_by_kind() const;
/**
* Similar to `partition_by_kind()` but caller gets to decide what value of
* type `T` each kind maps to.
*/
template <class T>
std::unordered_map<T, TaintV1> partition_by_kind(
const std::function<T(const Kind*)>& map_kind) const {
std::unordered_map<T, TaintV1> result;
for (const auto& frame_set : *this) {
T partitioned_value = map_kind(frame_set.kind());
auto iterator = result.find(partitioned_value);
auto existing_taint =
iterator != result.end() ? iterator->second : TaintV1::bottom();
existing_taint.add(frame_set);
result[partitioned_value] = existing_taint;
}
return result;
}
/**
* Returns all features for this taint tree, joined as `FeatureMayAlwaysSet`.
*/
FeatureMayAlwaysSet features_joined() const;
private:
void map(const std::function<void(FrameSet&)>& f);
void filter(const std::function<bool(const FrameSet&)>& predicate);
private:
Set set_;
};
} // namespace marianatrench