source/Access.h (226 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <initializer_list>
#include <limits>
#include <optional>
#include <vector>
#include <boost/functional/hash.hpp>
#include <json/json.h>
#include <ConstantAbstractDomain.h>
#include <DexClass.h>
#include <IRInstruction.h>
#include <mariana-trench/Assert.h>
#include <mariana-trench/Compiler.h>
namespace marianatrench {
class Method;
/* Integer type representing a register number. */
using Register = std::uint32_t;
// This should match with the type `reg_t` used in Redex.
static_assert(std::is_same_v<Register, reg_t>, "type mismatch");
/* Integer type representing a parameter number. */
using ParameterPosition = std::uint32_t;
std::optional<ParameterPosition> parse_parameter_position(
const std::string& string);
/**
* Represents the path of an access path, without the root, e.g. `x.y.z`
*/
class Path final {
public:
using Element = const DexString*;
using ConstIterator = std::vector<Element>::const_iterator;
public:
// C++ container concept member types
using iterator = ConstIterator;
using const_iterator = ConstIterator;
using value_type = Element;
using difference_type = std::ptrdiff_t;
using size_type = std::size_t;
using const_reference = const Element&;
using const_pointer = const Element*;
public:
Path() = default;
explicit Path(std::initializer_list<Element> elements)
: elements_(elements) {}
Path(const Path&) = default;
Path(Path&&) = default;
Path& operator=(const Path&) = default;
Path& operator=(Path&&) = default;
~Path() = default;
bool operator==(const Path& other) const;
void append(Element element);
void extend(const Path& path);
void pop_back();
void truncate(std::size_t max_size);
bool empty() const {
return elements_.empty();
}
std::size_t size() const {
return elements_.size();
}
ConstIterator begin() const {
return elements_.cbegin();
}
ConstIterator end() const {
return elements_.cend();
}
bool is_prefix_of(const Path& other) const;
void reduce_to_common_prefix(const Path& other);
private:
friend std::ostream& operator<<(std::ostream& out, const Path& path);
private:
std::vector<Element> elements_;
};
} // namespace marianatrench
template <>
struct std::hash<marianatrench::Path> {
std::size_t operator()(const marianatrench::Path& path) const {
std::size_t seed = 0;
for (auto* path_element : path) {
boost::hash_combine(seed, path_element);
}
return seed;
}
};
namespace marianatrench {
/**
* Represents the root of an access path.
*
* This is either the return value or an argument.
*/
class Root final {
public:
using IntegerEncoding = ParameterPosition;
enum class Kind : IntegerEncoding {
Argument = 0,
Return = std::numeric_limits<IntegerEncoding>::max(),
/* When used as a callee port of a `Frame`, it represents a leaf frame. */
Leaf = std::numeric_limits<IntegerEncoding>::max() - 1,
/*
* When used as a callee port of a `Frame`, `Anchor` and `Producer` are used
* as "connection points" where data flows into another codebase, e.g.:
* GraphQL, native. Information about these will be output to CRTEX. They
* mark connection points with sources/sinks that flow to/from another
* codebase. `Anchor` is for those where Mariana Trench detected the flow
* and will output to CRTEX. `Producer` is for those detected by another
* analyzer and then read as input by Mariana Trench.
*/
Anchor = std::numeric_limits<IntegerEncoding>::max() - 2,
Producer = std::numeric_limits<IntegerEncoding>::max() - 3,
/*
* In CRTEX, "this" argument, represented by argument(0) in Mariana Trench,
* has index -1 in other codebases. This cannot be represented by the
* unsigned encoding, so use a special kind. In the analysis,
* `CanonicalThis` is not considered an argument.
*/
CanonicalThis = std::numeric_limits<IntegerEncoding>::max() - 4,
MaxArgument = std::numeric_limits<IntegerEncoding>::max() - 5,
};
private:
explicit Root(IntegerEncoding value) : value_(value) {}
public:
/* Default constructor required by sparta, do not use. */
explicit Root() : value_(static_cast<IntegerEncoding>(Kind::Return)) {}
explicit Root(Kind kind, ParameterPosition parameter_position = 0) {
if (kind == Kind::Argument) {
value_ = parameter_position;
} else {
value_ = static_cast<IntegerEncoding>(kind);
}
}
Root(const Root&) = default;
Root(Root&&) = default;
Root& operator=(const Root&) = default;
Root& operator=(Root&&) = default;
~Root() = default;
bool operator==(const Root& other) const {
return value_ == other.value_;
}
bool operator!=(const Root& other) const {
return value_ != other.value_;
}
bool is_argument() const {
return value_ <= static_cast<IntegerEncoding>(Kind::MaxArgument);
}
bool is_return() const {
return value_ == static_cast<IntegerEncoding>(Kind::Return);
}
bool is_leaf() const {
return value_ == static_cast<IntegerEncoding>(Kind::Leaf);
}
bool is_anchor() const {
return value_ == static_cast<IntegerEncoding>(Kind::Anchor);
}
bool is_producer() const {
return value_ == static_cast<IntegerEncoding>(Kind::Producer);
}
/* Is it used as callee port for a leaf frame? */
bool is_leaf_port() const {
switch (kind()) {
case Kind::Leaf:
case Kind::Anchor:
case Kind::Producer:
return true;
default:
return false;
}
}
Kind kind() const {
if (is_argument()) {
return Kind::Argument;
} else {
return static_cast<Kind>(value_);
}
}
ParameterPosition parameter_position() const {
mt_assert(is_argument());
return value_;
}
IntegerEncoding encode() const {
return value_;
}
std::string to_string() const;
static Root decode(IntegerEncoding value) {
return Root(value);
}
static Root from_json(const Json::Value& value);
Json::Value to_json() const;
private:
friend std::ostream& operator<<(std::ostream& out, const Root& root);
private:
// If the root is a parameter, this is the parameter position.
// If the root is the return value, this is the biggest integer.
// Note that `RootPatriciaTreeAbstractPartition` relies on this encoding.
IntegerEncoding value_;
};
} // namespace marianatrench
template <>
struct std::hash<marianatrench::Root> {
std::size_t operator()(const marianatrench::Root& root) const {
return root.encode();
}
};
namespace marianatrench {
/**
* Represents an access path, with a root and a path.
*/
class AccessPath final {
public:
/* Default constructor required by sparta, do not use. */
explicit AccessPath() = default;
explicit AccessPath(Root root, Path path = {})
: root_(root), path_(std::move(path)) {}
AccessPath(const AccessPath&) = default;
AccessPath(AccessPath&&) = default;
AccessPath& operator=(const AccessPath&) = default;
AccessPath& operator=(AccessPath&&) = default;
~AccessPath() = default;
bool operator==(const AccessPath& other) const;
bool leq(const AccessPath& other) const;
void join_with(const AccessPath& other);
Root root() const {
return root_;
}
const Path& path() const {
return path_;
}
void append(Path::Element element) {
path_.append(element);
}
void extend(const Path& path) {
path_.extend(path);
}
void pop_back() {
path_.pop_back();
}
void truncate(std::size_t max_size) {
path_.truncate(max_size);
}
/**
* Used to produce canonical ports (alongside canonical_names) for CRTEX.
*
* Returns the canonical port for `method` that is compatible with other
* analyses, in the form "Anchor:Argument(x)" with two roots. `Anchor` is
* stored as the root while "Argument(x)" is stored in the Path.
*/
AccessPath canonicalize_for_method(const Method* method) const;
/**
* Split a json string into access path elements.
*
* For instance:
* ```
* >>> split_path(Json::Value("Return.x.y"));
* <<< ["Return", "x", "y"]
* ```
*
* Throws a `JsonValidationError` if the format is invalid.
*/
static std::vector<std::string> split_path(const Json::Value& value);
/**
* Parse a json string into an access path.
*
* See `split_path` for examples of the syntax.
*/
static AccessPath from_json(const Json::Value& value);
Json::Value to_json() const;
private:
friend std::ostream& operator<<(
std::ostream& out,
const AccessPath& access_path);
private:
Root root_;
Path path_;
};
} // namespace marianatrench
template <>
struct std::hash<marianatrench::AccessPath> {
std::size_t operator()(const marianatrench::AccessPath& access_path) const {
std::size_t seed = 0;
boost::hash_combine(
seed, std::hash<marianatrench::Root>()(access_path.root()));
boost::hash_combine(
seed, std::hash<marianatrench::Path>()(access_path.path()));
return seed;
}
};
namespace marianatrench {
/**
* Represents the access path constant abstract domain.
*
* This is either bottom, top or an access path.
*/
using AccessPathConstantDomain = sparta::ConstantAbstractDomain<AccessPath>;
} // namespace marianatrench