source/Frame.h (242 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <optional>
#include <ostream>
#include <json/json.h>
#include <AbstractDomain.h>
#include <HashedSetAbstractDomain.h>
#include <mariana-trench/Access.h>
#include <mariana-trench/Assert.h>
#include <mariana-trench/CanonicalName.h>
#include <mariana-trench/Compiler.h>
#include <mariana-trench/Context.h>
#include <mariana-trench/Feature.h>
#include <mariana-trench/FeatureMayAlwaysSet.h>
#include <mariana-trench/FeatureSet.h>
#include <mariana-trench/FieldSet.h>
#include <mariana-trench/Kind.h>
#include <mariana-trench/Kinds.h>
#include <mariana-trench/LocalPositionSet.h>
#include <mariana-trench/Method.h>
#include <mariana-trench/MethodSet.h>
#include <mariana-trench/Position.h>
namespace marianatrench {
using RootSetAbstractDomain = sparta::HashedSetAbstractDomain<Root>;
using CanonicalNameSetAbstractDomain =
sparta::HashedSetAbstractDomain<CanonicalName>;
/**
* Represents a frame of a trace, i.e a single hop between methods.
*
* The `kind` is the label of the taint, e.g "UserInput".
*
* The `callee_port` is the port to the next method in the trace, or
* `Root::Kind::Leaf` for a leaf frame.
*
* `callee` is the next method in the trace. This is `nullptr` for a leaf frame.
*
* `field_callee` is set only if this frame is a frame within a field model or
* if it represents taint in a method model that came from a field access. Both
* callee and field_callee should never be set in one frame.
*
* `call_position` is the position of the call to the `callee`. This is
* `nullptr` for a leaf frame. This can be non-null for leaf frames inside
* issues, to describe the position of a parameter source or return sink.
*
* `distance` is the shortest length of the trace, i.e from this frame to the
* closest leaf. This is `0` for a leaf frame.
*
* `origins` is the set of methods that originated the taint. This is the
* union of all methods at the end of the trace, i.e the leaves.
*
* `features` is a set of tags used to give extra information about the trace.
* For instance, "via-numerical-operator" could be used to express that the
* trace goes through a numerical operator. Internally, this is represented by:
* inferred_features:
* Features propagated into this frame, usually from its callee.
* locally_inferred_features:
* Features inferred within this frame (not propagated from a callee).
* These features are only ever added after frame creation.
* user_features:
* User-defined features from a JSON.
*
* `via_type_of_ports` is a set of ports for each of which we would like to
* materialize a 'via-type-of' feature with the type of the port seen at a
* callsite and include it in the inferred features of the taint at that
* callsite
* `via_value_of_ports` is a set of ports for each of which we would like to
* materialize a 'via-value-of' feature with the value of the port seen at a
* callsite and include it in the inferred features of the taint at that
* callsite
*
* `local_positions` is the set of positions that the taint flowed through
* within the current method.
*
* `canonical_names` is used for cross-repo taint exchange (crtex) which
* requires that callee names at the leaves conform to a naming format. This
* format is defined using placeholders. See `CanonicalName`.
*
* For artificial sources, the callee port is used as the origin of the source.
*/
class Frame final : public sparta::AbstractDomain<Frame> {
public:
/* Create the bottom frame. */
explicit Frame()
: kind_(nullptr),
callee_port_(Root(Root::Kind::Leaf)),
callee_(nullptr),
call_position_(nullptr),
distance_(0) {}
explicit Frame(
const Kind* kind,
AccessPath callee_port,
const Method* MT_NULLABLE callee,
const Field* MT_NULLABLE field_callee,
const Position* MT_NULLABLE call_position,
int distance,
MethodSet origins,
FieldSet field_origins,
FeatureMayAlwaysSet inferred_features,
FeatureMayAlwaysSet locally_inferred_features,
FeatureSet user_features,
RootSetAbstractDomain via_type_of_ports,
RootSetAbstractDomain via_value_of_ports,
LocalPositionSet local_positions,
CanonicalNameSetAbstractDomain canonical_names)
: kind_(kind),
callee_port_(std::move(callee_port)),
callee_(callee),
field_callee_(field_callee),
call_position_(call_position),
distance_(distance),
origins_(std::move(origins)),
field_origins_(std::move(field_origins)),
inferred_features_(std::move(inferred_features)),
locally_inferred_features_(std::move(locally_inferred_features)),
user_features_(std::move(user_features)),
via_type_of_ports_(std::move(via_type_of_ports)),
via_value_of_ports_(std::move(via_value_of_ports)),
local_positions_(std::move(local_positions)),
canonical_names_(std::move(canonical_names)) {
mt_assert(kind_ != nullptr);
mt_assert(distance_ >= 0);
mt_assert(!local_positions_.is_bottom());
mt_assert(!(callee && field_callee));
}
static Frame leaf(
const Kind* kind,
FeatureMayAlwaysSet inferred_features,
FeatureMayAlwaysSet locally_inferred_features,
FeatureSet user_features,
MethodSet origins) {
return Frame(
kind,
/* callee_port */ AccessPath(Root(Root::Kind::Leaf)),
/* callee */ nullptr,
/* field_callee */ nullptr,
/* call_position */ nullptr,
/* distance */ 0,
origins,
/* field origins */ {},
inferred_features,
locally_inferred_features,
user_features,
/* via_type_of_ports */ {},
/* via_value_of_ports */ {},
/* local_positions */ {},
/* canonical_names */ {});
}
static Frame leaf(const Kind* kind) {
return Frame::leaf(
kind,
/* inferred_features */ FeatureMayAlwaysSet::bottom(),
/* locally_inferred_features */ FeatureMayAlwaysSet::bottom(),
/* user_features */ FeatureSet::bottom(),
/* origins */ {});
}
static Frame crtex_leaf(
const Kind* kind,
AccessPath callee_port,
CanonicalNameSetAbstractDomain canonical_names) {
mt_assert(
callee_port.root().is_anchor() || callee_port.root().is_producer());
return Frame(
kind,
/* callee_port */ callee_port,
/* callee */ nullptr,
/* field_callee */ nullptr,
/* call_position */ nullptr,
/* distance */ 0,
/* origins */ {},
/* field_origins */ {},
/* inferred_features */ FeatureMayAlwaysSet::bottom(),
/* locally_inferred_features */ FeatureMayAlwaysSet::bottom(),
/* user_features */ {},
/* via_type_of_ports */ {},
/* via_value_of_ports */ {},
/* local_positions */ {},
canonical_names);
}
Frame(const Frame&) = default;
Frame(Frame&&) = default;
Frame& operator=(const Frame&) = default;
Frame& operator=(Frame&&) = default;
/* Return the kind, or `nullptr` for bottom. */
const Kind* MT_NULLABLE kind() const {
return kind_;
}
const AccessPath& callee_port() const {
return callee_port_;
}
/* Return the callee, or `nullptr` if this is a leaf frame. */
const Method* MT_NULLABLE callee() const {
return callee_;
}
/* Return the field_callee, or `nullptr` if this frame has a method callee or
* is a leaf. */
const Field* MT_NULLABLE field_callee() const {
return field_callee_;
}
/* Return the position of the call, or `nullptr` if this is a leaf frame. */
const Position* MT_NULLABLE call_position() const {
return call_position_;
}
int distance() const {
return distance_;
}
const RootSetAbstractDomain& via_type_of_ports() const {
return via_type_of_ports_;
}
const RootSetAbstractDomain& via_value_of_ports() const {
return via_value_of_ports_;
}
const CanonicalNameSetAbstractDomain& canonical_names() const {
return canonical_names_;
}
void set_origins(const MethodSet& origins);
void set_field_origins(const FieldSet& field_origins);
const MethodSet& origins() const {
return origins_;
}
const FieldSet& field_origins() const {
return field_origins_;
}
/**
* Despite its name, this adds to locally_inferred_features. Non-local
* inferred features are used only for frame propagation.
*/
void add_inferred_features(const FeatureMayAlwaysSet& features);
const FeatureMayAlwaysSet& inferred_features() const {
return inferred_features_;
}
const FeatureMayAlwaysSet& locally_inferred_features() const {
return locally_inferred_features_;
}
const FeatureSet& user_features() const {
return user_features_;
}
FeatureMayAlwaysSet features() const;
void add_local_position(const Position* position);
void set_local_positions(LocalPositionSet positions);
const LocalPositionSet& local_positions() const {
return local_positions_;
}
static Frame bottom() {
return Frame();
}
static Frame top() {
mt_unreachable(); // Not implemented.
}
bool is_bottom() const override {
return kind_ == nullptr;
}
bool is_top() const override {
return false;
}
bool is_leaf() const {
return callee_ == nullptr;
}
bool is_crtex_producer_declaration() const {
// If true, this frame corresponds to the crtex leaf frame declared by
// the user (callee == nullptr). Also, the producer run declarations use the
// `Anchor` port, while consumer runs use the `Producer` port.
return callee_ == nullptr && callee_port_.root().is_anchor();
}
void set_to_bottom() override {
kind_ = nullptr;
}
void set_to_top() override {
mt_unreachable(); // Not implemented.
}
bool leq(const Frame& other) const override;
bool equals(const Frame& other) const override;
void join_with(const Frame& other) override;
void widen_with(const Frame& other) override;
void meet_with(const Frame& other) override;
void narrow_with(const Frame& other) override;
/**
* Return an artificial source for the given parameter.
*
* An artificial source is a source used to track the flow of a parameter,
* to infer sinks and propagations. Instead of relying on a backward analysis,
* we introduce these artificial sources in the forward analysis. This saves
* the maintenance cost of having a forward and backward transfer function.
*/
static Frame artificial_source(ParameterPosition parameter_position);
static Frame artificial_source(AccessPath access_path);
bool is_artificial_source() const {
return kind_ == Kinds::artificial_source();
}
/* Append a field to the callee port. Only safe for artificial sources. */
void callee_port_append(Path::Element path_element);
/* Return frame with the given kind (and every other field kept the same) */
Frame with_kind(const Kind* kind) const;
static Frame from_json(const Json::Value& value, Context& context);
Json::Value to_json() const;
// Describe how to join frames together in `FrameSet`.
struct GroupEqual {
bool operator()(const Frame& left, const Frame& right) const;
};
// Describe how to join frames together in `FrameSet`.
struct GroupHash {
std::size_t operator()(const Frame& frame) const;
};
friend std::ostream& operator<<(std::ostream& out, const Frame& frame);
private:
const Kind* MT_NULLABLE kind_;
AccessPath callee_port_;
const Method* MT_NULLABLE callee_;
const Field* MT_NULLABLE field_callee_;
const Position* MT_NULLABLE call_position_;
int distance_;
MethodSet origins_;
FieldSet field_origins_;
FeatureMayAlwaysSet inferred_features_;
FeatureMayAlwaysSet locally_inferred_features_;
FeatureSet user_features_;
RootSetAbstractDomain via_type_of_ports_;
RootSetAbstractDomain via_value_of_ports_;
LocalPositionSet local_positions_;
CanonicalNameSetAbstractDomain canonical_names_;
};
} // namespace marianatrench