Jit/runtime.h (239 lines of code) (raw):
// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
#pragma once
#include "Jit/containers.h"
#include "Jit/deopt.h"
#include "Jit/fixed_type_profiler.h"
#include "Jit/inline_cache.h"
#include "Jit/jit_rt.h"
#include "Jit/pyjit.h"
#include "Jit/threaded_compile.h"
#include "Jit/type_profiler.h"
#include "Jit/util.h"
#include <optional>
#include <unordered_map>
#include <unordered_set>
namespace jit {
class LoadMethodCachePool {
public:
explicit LoadMethodCachePool(std::size_t num_entries)
: entries_(nullptr), num_entries_(num_entries), num_allocated_(0) {
if (num_entries == 0) {
return;
}
entries_.reset(new JITRT_LoadMethodCache[num_entries]);
for (std::size_t i = 0; i < num_entries_; i++) {
JITRT_InitLoadMethodCache(&(entries_[i]));
}
}
~LoadMethodCachePool() {}
JITRT_LoadMethodCache* AllocateEntry() {
JIT_CHECK(
num_allocated_ < num_entries_,
"not enough space alloc=%lu capacity=%lu",
num_allocated_,
num_entries_);
JITRT_LoadMethodCache* entry = &entries_[num_allocated_];
num_allocated_++;
return entry;
}
private:
DISALLOW_COPY_AND_ASSIGN(LoadMethodCachePool);
std::unique_ptr<JITRT_LoadMethodCache[]> entries_;
std::size_t num_entries_;
std::size_t num_allocated_;
};
class GenYieldPoint;
// In a regular JIT function spill-data is stored at negative offsets from RBP
// and RBP points into the system stack. In JIT generators spilled data is still
// stored backwards from RBP, but RBP points to a heap allocated block and this
// persists when the generator is suspended.
//
// While the content of spill data is arbitrary depending on the function, we
// also have a few items of data about the current generator we want to access
// quickly. We can do this via positive offsets from RBP into the
// GenSuspendDataFooter struct defined below.
//
// Together the spill data and GenDataFooter make up the complete JIT-specific
// data needed for a generator. PyGenObject::gi_jit_data points to the _top_ of
// the spill data (i.e. at the start of the footer). This allows us to easily
// set RBP to the pointer value on generator resume.
//
// The base address of the complete heap allocated suspend data is:
// PyGenObject::gi_jit_data - GenDataFooter::spill_words
typedef struct _GenDataFooter {
// Tools which examine/walk the stack expect the following two values to be
// ahead of RBP.
uint64_t linkAddress;
uint64_t returnAddress;
// RBP that was swapped out to point to this spill-data.
uint64_t originalRbp;
// Current overall state of the JIT.
_PyJitGenState state;
// Allocated space before this struct in 64-bit words.
size_t spillWords;
// Entry-point to resume a JIT generator.
GenResumeFunc resumeEntry;
// Static data specific to the current yield point. Only non-null when we are
// suspended.
GenYieldPoint* yieldPoint;
// Associated generator object
PyGenObject* gen;
// JIT metadata for associated code object
CodeRuntime* code_rt{nullptr};
} GenDataFooter;
// The state field needs to be at a fixed offset so it can be quickly accessed
// from C code.
static_assert(
offsetof(GenDataFooter, state) == _PY_GEN_JIT_DATA_STATE_OFFSET,
"Byte offset for state shifted");
// The number of words for pre-allocated blocks in the generator suspend data
// free-list. I chose this based on it covering 99% of the JIT generator
// spill-sizes needed when running 'make testcinder_jit' at the time I collected
// this data. For reference:
// 99.9% coverage came at 256 spill size
// 99.99% was at 1552
// max was 4999
// There were about ~15k JIT generators in total during the run.
const size_t kMinGenSpillWords = 89;
class GenYieldPoint {
public:
explicit GenYieldPoint(
const std::vector<ptrdiff_t>&& pyobj_offs,
bool is_yield_from,
ptrdiff_t yield_from_offs)
: pyobj_offs_(std::move(pyobj_offs)),
isYieldFrom_(is_yield_from),
yieldFromOffs_(yield_from_offs) {}
void setResumeTarget(uint64_t resume_target) {
resume_target_ = resume_target;
}
uint64_t resumeTarget() const {
return resume_target_;
}
int visitRefs(PyGenObject* gen, visitproc visit, void* arg) const;
void releaseRefs(PyGenObject* gen) const;
PyObject* yieldFromValue(GenDataFooter* gen_footer) const;
static constexpr int resumeTargetOffset() {
return offsetof(GenYieldPoint, resume_target_);
}
private:
uint64_t resume_target_;
const std::vector<ptrdiff_t> pyobj_offs_;
const bool isYieldFrom_;
const ptrdiff_t yieldFromOffs_;
};
class RuntimeFrameState {
public:
RuntimeFrameState(BorrowedRef<PyCodeObject> code, BorrowedRef<> globals)
: code_(code), globals_(globals) {}
bool isGen() const {
return code()->co_flags & kCoFlagsAnyGenerator;
}
BorrowedRef<PyCodeObject> code() const {
return code_;
}
BorrowedRef<> globals() const {
return globals_;
}
static constexpr int64_t codeOffset() {
return offsetof(RuntimeFrameState, code_);
}
private:
// These are owned by the CodeRuntime that owns this RuntimeFrameState.
BorrowedRef<PyCodeObject> code_;
BorrowedRef<> globals_;
};
// Runtime data for a PyCodeObject object, containing caches and any other data
// associated with a JIT-compiled function.
class CodeRuntime {
public:
explicit CodeRuntime(
PyCodeObject* code,
PyObject* globals,
jit::hir::FrameMode frame_mode,
std::size_t num_lm_caches,
std::size_t num_la_caches,
std::size_t num_sa_caches,
std::size_t num_lat_caches)
: frame_state_(code, globals),
frame_mode_(frame_mode),
load_method_cache_pool_(num_lm_caches),
load_attr_cache_pool_(num_la_caches),
store_attr_cache_pool_(num_sa_caches),
load_type_attr_caches_(
std::make_unique<LoadTypeAttrCache[]>(num_lat_caches)) {
// TODO(T88040922): Until we work out something smarter, force code and
// globals objects for compiled functions to live as long as the JIT is
// initialized.
addReference(reinterpret_cast<PyObject*>(code));
addReference(globals);
}
template <typename... Args>
RuntimeFrameState* allocateRuntimeFrameState(Args&&... args) {
// Serialize as we modify the globally shared runtimes data.
ThreadedCompileSerialize guard;
inlined_frame_states_.emplace_back(
std::make_unique<RuntimeFrameState>(std::forward<Args>(args)...));
return inlined_frame_states_.back().get();
}
~CodeRuntime() {}
jit::hir::FrameMode frameMode() const {
return frame_mode_;
}
const RuntimeFrameState* frameState() const {
return &frame_state_;
}
// Release any references this CodeRuntime holds to Python objects.
void releaseReferences();
JITRT_LoadMethodCache* AllocateLoadMethodCache() {
return load_method_cache_pool_.AllocateEntry();
}
LoadAttrCache* AllocateLoadAttrCache() {
return load_attr_cache_pool_.allocate();
}
StoreAttrCache* allocateStoreAttrCache() {
return store_attr_cache_pool_.allocate();
}
LoadTypeAttrCache* getLoadTypeAttrCache(int id) {
return &load_type_attr_caches_[id];
}
// Ensure that this CodeRuntime owns a reference to the given object, keeping
// it alive for use by the compiled code.
void addReference(PyObject* obj);
// Store meta-data about generator yield point.
GenYieldPoint* addGenYieldPoint(GenYieldPoint&& gen_yield_point) {
gen_yield_points_.emplace_back(std::move(gen_yield_point));
return &gen_yield_points_.back();
}
void set_frame_size(int size) {
frame_size_ = size;
}
int frame_size() const {
return frame_size_;
}
// Add or lookup a mapping from a point in generated code to corresponding
// bytecode offset.
void addIPtoBCOff(uintptr_t ip, int bc_off);
// Returns the bytecode offset for the given address in generated code.
std::optional<int> getBCOffForIP(uintptr_t ip) const;
static constexpr int64_t frameStateOffset() {
return offsetof(CodeRuntime, frame_state_);
}
static const int64_t kPyCodeOffset;
private:
RuntimeFrameState frame_state_;
std::vector<std::unique_ptr<RuntimeFrameState>> inlined_frame_states_;
jit::hir::FrameMode frame_mode_;
LoadMethodCachePool load_method_cache_pool_;
InlineCachePool<LoadAttrCache> load_attr_cache_pool_;
InlineCachePool<StoreAttrCache> store_attr_cache_pool_;
std::unique_ptr<LoadTypeAttrCache[]> load_type_attr_caches_;
std::unordered_set<Ref<PyObject>> references_;
// Metadata about yield points. Deque so we can have raw pointers to content.
std::deque<GenYieldPoint> gen_yield_points_;
int frame_size_{-1};
// Map of address in compiled code to bytecode offset
std::unordered_map<uintptr_t, int> ip_to_bc_off_;
};
// Information about the runtime behavior of a single deopt point: how often
// it's been hit, and the frequency of guilty types, if applicable.
struct DeoptStat {
std::size_t count;
FixedTypeProfiler<4> types;
};
// Map from DeoptMetadata index to stats about that deopt point.
using DeoptStats = std::unordered_map<std::size_t, DeoptStat>;
using BytecodeOffset = int;
// Profiling information for a PyCodeObject. Includes the total number of
// bytecodes executed and type profiles for certain opcodes, keyed by bytecode
// offset.
struct CodeProfile {
UnorderedMap<BytecodeOffset, std::unique_ptr<TypeProfiler>> typed_hits;
int64_t total_hits;
};
using TypeProfiles = std::unordered_map<Ref<PyCodeObject>, CodeProfile>;
// this class collects all the data needed for JIT at runtime
// it maps a PyCodeObject to the runtime info the PyCodeObject needs.
class Runtime {
public:
template <typename... Args>
CodeRuntime* allocateCodeRuntime(Args&&... args) {
// Serialize as we modify the globally shared runtimes data.
ThreadedCompileSerialize guard;
runtimes_.emplace_back(
std::make_unique<CodeRuntime>(std::forward<Args>(args)...));
return runtimes_.back().get();
}
// Create or look up a cache for the global with the given name, in the
// context of the given globals dict. This cache will fall back to
// builtins if the value isn't defined in this dict.
GlobalCache findGlobalCache(PyObject* globals, PyObject* name);
// Create or look up a cache for a member with the given name, in the
// context of the given dict. This cache will not fall back to builtins
// if the value isn't defined in the dict.
GlobalCache findDictCache(PyObject* globals, PyObject* name);
// Find a cache for the indirect static entry point for a function.
void** findFunctionEntryCache(PyFunctionObject* function);
// Gets information about the primitive arguments that a function
// is typed to. Typed object references are explicitly excluded.
_PyTypedArgsInfo* findFunctionPrimitiveArgInfo(PyFunctionObject* function);
// Forget given cache. Note that for now, this only removes bookkeeping for
// the cache; the cache itself is not freed and may still be reachable from
// compiled code.
void forgetLoadGlobalCache(GlobalCache cache);
// Add metadata used during deopt. Returns a handle that can be used to
// fetch the metadata from generated code.
std::size_t addDeoptMetadata(DeoptMetadata&& deopt_meta);
DeoptMetadata& getDeoptMetadata(std::size_t id);
// Record that a deopt of the given index happened at runtime, with an
// optional guilty value.
void recordDeopt(std::size_t idx, PyObject* guilty_value);
// Get and/or clear runtime deopt stats.
const DeoptStats& deoptStats() const;
void clearDeoptStats();
TypeProfiles& typeProfiles();
using GuardFailureCallback = std::function<void(const DeoptMetadata&)>;
// Add a function to be called when deoptimization occurs due to guard
// failure. Intended to be used for testing/debugging only.
void setGuardFailureCallback(GuardFailureCallback cb);
void guardFailed(const DeoptMetadata& deopt_meta);
void clearGuardFailureCallback();
// Ensure that this Runtime owns a reference to the given object, keeping
// it alive for use by compiled code.
void addReference(PyObject* obj);
// Release any references this Runtime holds to Python objects.
void releaseReferences();
template <typename T, typename... Args>
T* allocateDeoptPatcher(Args&&... args) {
deopt_patchers_.emplace_back(
std::make_unique<T>(std::forward<Args>(args)...));
return static_cast<T*>(deopt_patchers_.back().get());
}
private:
std::vector<std::unique_ptr<CodeRuntime>> runtimes_;
GlobalCacheMap global_caches_;
FunctionEntryCacheMap function_entry_caches_;
// Global caches removed by forgetGlobalCaches() may still be reachable from
// compiled code, and are kept alive here until runtime shutdown.
std::vector<GlobalCacheValue> orphaned_global_caches_;
std::vector<DeoptMetadata> deopt_metadata_;
DeoptStats deopt_stats_;
GuardFailureCallback guard_failure_callback_;
TypeProfiles type_profiles_;
// References to Python objects held by this Runtime
std::unordered_set<Ref<PyObject>> references_;
std::vector<std::unique_ptr<DeoptPatcher>> deopt_patchers_;
};
} // namespace jit