Jit/jit_rt.cpp (1,770 lines of code) (raw):
// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
#include "Jit/jit_rt.h"
#include "Objects/dict-common.h"
#include "Python.h"
#include "classloader.h"
#include "frameobject.h"
#include "listobject.h"
#include "object.h"
#include "pycore_shadow_frame.h"
#include "pystate.h"
#include "switchboard.h"
#include "Jit/codegen/gen_asm.h"
#include "Jit/frame.h"
#include "Jit/log.h"
#include "Jit/pyjit.h"
#include "Jit/ref.h"
#include "Jit/runtime.h"
#include "Jit/util.h"
// clang-format off
#include "internal/pycore_pyerrors.h"
#include "internal/pycore_pystate.h"
#include "internal/pycore_object.h"
#include "internal/pycore_tupleobject.h"
// clang-format on
// This is mostly taken from ceval.c _PyEval_EvalCodeWithName
// We use the same logic to turn **args, nargsf, and kwnames into
// **args / nargsf.
// One significant difference is we don't need to incref the args
// in the new array.
static int JITRT_BindKeywordArgs(
PyFunctionObject* func,
PyObject** args,
size_t nargsf,
PyObject* kwnames,
PyObject** arg_space,
Py_ssize_t total_args,
Ref<PyObject>& kwdict,
Ref<PyObject>& varargs) {
PyCodeObject* co = (PyCodeObject*)func->func_code;
Py_ssize_t argcount = PyVectorcall_NARGS(nargsf);
for (int i = 0; i < total_args; i++) {
arg_space[i] = NULL;
}
// Create a dictionary for keyword parameters (**kwags)
if (co->co_flags & CO_VARKEYWORDS) {
kwdict = Ref<>::steal(PyDict_New());
if (kwdict == NULL) {
return 0;
}
arg_space[total_args - 1] = kwdict;
}
// Copy all positional arguments into local variables
Py_ssize_t n = std::min<Py_ssize_t>(argcount, co->co_argcount);
for (Py_ssize_t j = 0; j < n; j++) {
arg_space[j] = args[j];
}
// Pack other positional arguments into the *args argument
if (co->co_flags & CO_VARARGS) {
varargs = Ref<>::steal(_PyTuple_FromArray(args + n, argcount - n));
if (varargs == NULL) {
return 0;
}
Py_ssize_t i = total_args - 1;
if (co->co_flags & CO_VARKEYWORDS) {
i--;
}
arg_space[i] = varargs;
}
// Handle keyword arguments passed as two strided arrays
if (kwnames != NULL) {
for (Py_ssize_t i = 0; i < PyTuple_Size(kwnames); i++) {
PyObject** co_varnames;
PyObject* keyword = PyTuple_GET_ITEM(kwnames, i);
PyObject* value = args[argcount + i];
Py_ssize_t j;
if (keyword == NULL || !PyUnicode_Check(keyword)) {
return 0;
}
// Speed hack: do raw pointer compares. As names are
// normally interned this should almost always hit.
co_varnames = ((PyTupleObject*)(co->co_varnames))->ob_item;
for (j = co->co_posonlyargcount; j < total_args; j++) {
PyObject* name = co_varnames[j];
if (name == keyword) {
goto kw_found;
}
}
// Slow fallback, just in case
for (j = co->co_posonlyargcount; j < total_args; j++) {
PyObject* name = co_varnames[j];
int cmp = PyObject_RichCompareBool(keyword, name, Py_EQ);
if (cmp > 0) {
goto kw_found;
} else if (cmp < 0) {
return 0;
}
}
if (kwdict == NULL || PyDict_SetItem(kwdict, keyword, value) == -1) {
return 0;
}
continue;
kw_found:
if (arg_space[j] != NULL) {
return 0;
}
arg_space[j] = value;
}
}
// Check the number of positional arguments
if ((argcount > co->co_argcount) && !(co->co_flags & CO_VARARGS)) {
return 0;
}
// Add missing positional arguments (copy default values from defs)
if (argcount < co->co_argcount) {
Py_ssize_t defcount;
if (func->func_defaults != NULL) {
defcount = PyTuple_Size(func->func_defaults);
} else {
defcount = 0;
}
Py_ssize_t m = co->co_argcount - defcount;
Py_ssize_t missing = 0;
for (Py_ssize_t i = argcount; i < m; i++) {
if (arg_space[i] == NULL) {
missing++;
}
}
if (missing) {
return 0;
}
if (defcount) {
PyObject* const* defs =
&((PyTupleObject*)func->func_defaults)->ob_item[0];
for (Py_ssize_t i = std::max<Py_ssize_t>(n - m, 0); i < defcount; i++) {
if (arg_space[m + i] == NULL) {
PyObject* def = defs[i];
arg_space[m + i] = def;
}
}
}
}
// Add missing keyword arguments (copy default values from kwdefs)
if (co->co_kwonlyargcount > 0) {
Py_ssize_t missing = 0;
PyObject* kwdefs = func->func_kwdefaults;
for (Py_ssize_t i = co->co_argcount; i < total_args; i++) {
PyObject* name;
if (arg_space[i] != NULL)
continue;
name = PyTuple_GET_ITEM(co->co_varnames, i);
if (kwdefs != NULL) {
PyObject* def = PyDict_GetItemWithError(kwdefs, name);
if (def) {
arg_space[i] = def;
continue;
} else if (_PyErr_Occurred(_PyThreadState_GET())) {
return 0;
}
}
missing++;
}
if (missing) {
return 0;
}
}
return 1;
}
// This uses JITRT_BindKeywordArgs to get the newly bound keyword
// arguments. We then turn around and dispatch to the
// JITed function with the newly packed args.
// Rather than copying over all of the error reporting we instead
// just dispatch to the normal _PyFunction_Vectorcall if anything
// goes wrong which is indicated by JITRT_BindKeywordArgs returning 0.
PyObject* JITRT_CallWithKeywordArgs(
PyFunctionObject* func,
PyObject** args,
size_t nargsf,
PyObject* kwnames) {
PyCodeObject* co = (PyCodeObject*)func->func_code;
const Py_ssize_t total_args = co->co_argcount + co->co_kwonlyargcount +
((co->co_flags & CO_VARKEYWORDS) ? 1 : 0) +
((co->co_flags & CO_VARARGS) ? 1 : 0);
PyObject* arg_space[total_args];
Ref<PyObject> kwdict, varargs;
if (JITRT_BindKeywordArgs(
func,
args,
nargsf,
kwnames,
arg_space,
total_args,
kwdict,
varargs)) {
return JITRT_GET_REENTRY(func->vectorcall)(
(PyObject*)func,
arg_space,
total_args | (nargsf & (_Py_AWAITED_CALL_MARKER)),
nullptr);
}
return _PyFunction_Vectorcall((PyObject*)func, args, nargsf, kwnames);
}
typedef JITRT_StaticCallReturn (*staticvectorcallfunc)(
PyObject* callable,
PyObject* const* args,
size_t nargsf,
PyObject* kwnames);
typedef JITRT_StaticCallFPReturn (*staticvectorcallfuncfp)(
PyObject* callable,
PyObject* const* args,
size_t nargsf,
PyObject* kwnames);
JITRT_StaticCallFPReturn JITRT_CallWithIncorrectArgcountFPReturn(
PyFunctionObject* func,
PyObject** args,
size_t nargsf,
int argcount) {
PyObject* defaults = func->func_defaults;
if (defaults == nullptr) {
// Function has no defaults; there's nothing we can do.
_PyFunction_Vectorcall((PyObject*)func, args, nargsf, NULL);
return {0.0, 0.0};
}
Py_ssize_t defcount = PyTuple_GET_SIZE(defaults);
Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
PyObject* arg_space[argcount];
Py_ssize_t defaulted_args = argcount - nargs;
if (nargs + defcount < argcount || nargs > argcount) {
// Not enough args with defaults, or too many args without defaults.
_PyFunction_Vectorcall((PyObject*)func, args, nargsf, NULL);
return {0.0, 0.0};
}
Py_ssize_t i;
for (i = 0; i < nargs; i++) {
arg_space[i] = *args++;
}
PyObject** def_items =
&((PyTupleObject*)defaults)->ob_item[defcount - defaulted_args];
for (; i < argcount; i++) {
arg_space[i] = *def_items++;
}
return reinterpret_cast<staticvectorcallfuncfp>(
JITRT_GET_REENTRY(func->vectorcall))(
(PyObject*)func,
arg_space,
argcount | (nargsf & (_Py_AWAITED_CALL_MARKER)),
// We lie to C++ here, and smuggle in the number of defaulted args filled
// in.
(PyObject*)defaulted_args);
}
JITRT_StaticCallReturn JITRT_CallWithIncorrectArgcount(
PyFunctionObject* func,
PyObject** args,
size_t nargsf,
int argcount) {
PyObject* defaults = func->func_defaults;
if (defaults == nullptr) {
// Function has no defaults; there's nothing we can do.
// Fallback to the default _PyFunction_Vectorcall implementation
// to produce an appropriate exception.
return {_PyFunction_Vectorcall((PyObject*)func, args, nargsf, NULL), NULL};
}
Py_ssize_t defcount = PyTuple_GET_SIZE(defaults);
Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
PyObject* arg_space[argcount];
Py_ssize_t defaulted_args = argcount - nargs;
if (nargs + defcount < argcount || nargs > argcount) {
// Not enough args with defaults, or too many args without defaults.
return {_PyFunction_Vectorcall((PyObject*)func, args, nargsf, NULL), NULL};
}
Py_ssize_t i;
for (i = 0; i < nargs; i++) {
arg_space[i] = *args++;
}
PyObject** def_items =
&((PyTupleObject*)defaults)->ob_item[defcount - defaulted_args];
for (; i < argcount; i++) {
arg_space[i] = *def_items++;
}
return reinterpret_cast<staticvectorcallfunc>(
JITRT_GET_REENTRY(func->vectorcall))(
(PyObject*)func,
arg_space,
argcount | (nargsf & (_Py_AWAITED_CALL_MARKER)),
// We lie to C++ here, and smuggle in the number of defaulted args filled
// in.
(PyObject*)defaulted_args);
}
static bool pack_static_args(
PyObject** args,
_PyTypedArgsInfo* arg_info,
void** arg_space,
Py_ssize_t nargs,
size_t num_gp_regs_for_args,
bool invoked_statically) {
Py_ssize_t arg_index = 0;
// When filling in arg_space we need to put all the args which fit into
// machine registers first, followed by all args which need to spill into
// memory. This is complicated because there are two sets of registers
// (general purpose and floating-point), and we don't know how of each type
// there will be in advance.
//
// To deal with this we add args to the arg_space considering whether they are
// floating-point or general-purpose. If there are enough of the correct type
// of registers available we add the args to arg_space going forward. If there
// aren't enough registers left we start add it starting from the end of the
// arg_space. After all args have been added we then reverse the order of
// memory-spilled args.
//
// TODO(jbower): It's should be possible to remove the reverse operation with
// enough other changes in JIT code generation.
size_t gp_reg_index = 0;
size_t fp_reg_index = 0;
size_t arg_space_i_regs = 0;
size_t arg_space_i_mem = nargs - 1;
auto add_to_arg_space = [&](void* value, bool is_fp) {
if (is_fp) {
if (fp_reg_index == jit::codegen::FP_ARGUMENT_REG_COUNT) {
arg_space[arg_space_i_mem--] = value;
} else {
arg_space[arg_space_i_regs++] = value;
fp_reg_index++;
}
} else {
if (gp_reg_index == num_gp_regs_for_args) {
arg_space[arg_space_i_mem--] = value;
} else {
arg_space[arg_space_i_regs++] = value;
gp_reg_index++;
}
}
};
for (Py_ssize_t i = 0; i < nargs; i++) {
if (arg_index < Py_SIZE(arg_info) &&
arg_info->tai_args[arg_index].tai_argnum == i) {
_PyTypedArgInfo* cur_arg = &arg_info->tai_args[arg_index];
PyObject* arg = args[i];
if (cur_arg->tai_primitive_type == -1) {
if (!invoked_statically &&
!_PyObject_TypeCheckOptional(
arg,
cur_arg->tai_type,
cur_arg->tai_optional,
cur_arg->tai_exact)) {
return true;
}
add_to_arg_space(arg, false);
} else if (_PyClassLoader_IsEnum(cur_arg->tai_type)) {
int64_t ival;
if (invoked_statically) {
ival = JITRT_UnboxI64(arg);
} else if (_PyObject_TypeCheckOptional(
arg,
cur_arg->tai_type,
cur_arg->tai_optional,
cur_arg->tai_exact)) {
ival = JITRT_UnboxEnum(arg);
} else {
return true;
}
JIT_DCHECK(
ival != -1 || !PyErr_Occurred(),
"enums are statically guaranteed to have type int64");
add_to_arg_space(reinterpret_cast<void*>(ival), false);
} else if (cur_arg->tai_primitive_type == TYPED_BOOL) {
if (Py_TYPE(arg) != &PyBool_Type) {
return true;
}
add_to_arg_space(reinterpret_cast<void*>(arg == Py_True), false);
} else if (cur_arg->tai_primitive_type == TYPED_DOUBLE) {
if (Py_TYPE(arg) != &PyFloat_Type) {
return true;
}
add_to_arg_space(bit_cast<void*>(PyFloat_AsDouble(arg)), true);
} else if (cur_arg->tai_primitive_type <= TYPED_INT64) {
// Primitive arg check
size_t val;
if (Py_TYPE(arg) != &PyLong_Type ||
!_PyClassLoader_OverflowCheck(
arg, cur_arg->tai_primitive_type, (size_t*)&val)) {
return true;
}
add_to_arg_space(reinterpret_cast<void*>(val), false);
} else {
JIT_CHECK(
false,
"unsupported primitive type %d",
cur_arg->tai_primitive_type);
}
arg_index++;
continue;
}
add_to_arg_space(reinterpret_cast<void*>(args[i]), false);
}
// Reverse memory-spilled args as explained above
size_t i1 = arg_space_i_mem + 1;
size_t i2 = nargs - 1;
while (i1 < i2) {
std::swap(arg_space[i1++], arg_space[i2--]);
}
return false;
}
template <typename TRetType, typename TVectorcall>
static TRetType call_statically_with_primitive_signature_worker(
PyFunctionObject* func,
PyObject** args,
size_t nargsf,
_PyTypedArgsInfo* arg_info) {
Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
void* arg_space[nargs];
bool invoked_statically = (nargsf & _Py_VECTORCALL_INVOKED_STATICALLY) != 0;
size_t num_gp_regs_for_args = jit::codegen::numGpRegsForArgs(
reinterpret_cast<PyCodeObject*>(func->func_code));
if (pack_static_args(
args,
arg_info,
arg_space,
nargs,
num_gp_regs_for_args,
invoked_statically)) {
goto fail;
}
return reinterpret_cast<TVectorcall>(JITRT_GET_REENTRY(func->vectorcall))(
(PyObject*)func, (PyObject**)arg_space, nargsf, NULL);
fail:
PyObject* res = _PyFunction_Vectorcall((PyObject*)func, args, nargsf, NULL);
JIT_DCHECK(res == NULL, "should alway be reporting an error");
return TRetType();
}
// This can either be a static method returning a primitive or a Python object,
// so we use JITRT_StaticCallReturn. If it's returning a primitive we'll return
// rdx from the function, or return NULL for rdx when we dispatch to
// _PyFunction_Vectorcall for error generation. If it returns a Python object
// we'll return an additional garbage rdx from our caller, but our caller won't
// care about it either.
template <typename TRetType, typename TVectorcall>
static TRetType call_statically_with_primitive_signature_template(
PyFunctionObject* func,
PyObject** args,
size_t nargsf,
PyObject* kwnames,
_PyTypedArgsInfo* arg_info) {
Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
PyCodeObject* co = (PyCodeObject*)func->func_code;
int invoked_statically = (nargsf & _Py_VECTORCALL_INVOKED_STATICALLY) != 0;
if (!invoked_statically &&
(kwnames || nargs != co->co_argcount ||
co->co_flags & (CO_VARARGS | CO_VARKEYWORDS))) {
// we need to fixup kwnames, defaults, etc...
PyCodeObject* co = (PyCodeObject*)func->func_code;
const Py_ssize_t total_args = co->co_argcount + co->co_kwonlyargcount +
((co->co_flags & CO_VARKEYWORDS) ? 1 : 0) +
((co->co_flags & CO_VARARGS) ? 1 : 0);
PyObject* arg_space[total_args];
Ref<PyObject> kwdict, varargs;
if (JITRT_BindKeywordArgs(
func,
args,
nargsf,
kwnames,
arg_space,
total_args,
kwdict,
varargs)) {
return call_statically_with_primitive_signature_worker<
TRetType,
TVectorcall>(
func, arg_space, total_args | PyVectorcall_FLAGS(nargsf), arg_info);
}
_PyFunction_Vectorcall((PyObject*)func, args, nargsf, kwnames);
return TRetType();
}
return call_statically_with_primitive_signature_worker<TRetType, TVectorcall>(
func, args, nargsf, arg_info);
}
JITRT_StaticCallReturn JITRT_CallStaticallyWithPrimitiveSignature(
PyFunctionObject* func,
PyObject** args,
size_t nargsf,
PyObject* kwnames,
_PyTypedArgsInfo* arg_info) {
return call_statically_with_primitive_signature_template<
JITRT_StaticCallReturn,
staticvectorcallfunc>(func, args, nargsf, kwnames, arg_info);
}
JITRT_StaticCallFPReturn JITRT_CallStaticallyWithPrimitiveSignatureFP(
PyFunctionObject* func,
PyObject** args,
size_t nargsf,
PyObject* kwnames,
_PyTypedArgsInfo* arg_info) {
return call_statically_with_primitive_signature_template<
JITRT_StaticCallFPReturn,
staticvectorcallfuncfp>(func, args, nargsf, kwnames, arg_info);
}
JITRT_StaticCallFPReturn JITRT_ReportStaticArgTypecheckErrorsWithDoubleReturn(
PyObject* func,
PyObject** args,
size_t nargsf,
PyObject* /* kwnames */) {
PyObject* res =
JITRT_ReportStaticArgTypecheckErrors(func, args, nargsf, NULL);
JIT_CHECK(res == NULL, "should always return an error");
return {0, 0};
}
JITRT_StaticCallReturn JITRT_ReportStaticArgTypecheckErrorsWithPrimitiveReturn(
PyObject* func,
PyObject** args,
size_t nargsf,
PyObject* /* kwnames */) {
PyObject* res =
JITRT_ReportStaticArgTypecheckErrors(func, args, nargsf, NULL);
JIT_CHECK(res == NULL, "should always return an error");
return {NULL, NULL};
}
PyObject* JITRT_ReportStaticArgTypecheckErrors(
PyObject* func,
PyObject** args,
size_t nargsf,
PyObject* /* kwnames */) {
auto code = reinterpret_cast<PyCodeObject*>(
reinterpret_cast<PyFunctionObject*>(func)->func_code);
int nkwonly = code->co_kwonlyargcount;
if (code == nullptr || nkwonly == 0) {
// We explicitly pass in nullptr for kwnames as the default arg count can
// be smuggled in to this function in place of kwnames.
return _PyFunction_Vectorcall(func, args, nargsf, nullptr);
}
// This function is called after we've successfully bound all
// arguments. However, we want to use the interpreter to construct the
// typecheck error. If the function takes any keyword-only arguments we must
// reconstruct kwnames so the the interpreted "prologue" in
// _PyEval_EvalCodeWithName can validate that the keyword-only arguments were
// passed as keywords.
Ref<> new_kwnames = Ref<>::steal(PyTuple_New(nkwonly));
if (new_kwnames == nullptr) {
return nullptr;
}
for (Py_ssize_t i = code->co_argcount; i < code->co_argcount + nkwonly; i++) {
Ref<> name(PyTuple_GetItem(code->co_varnames, i));
PyTuple_SetItem(new_kwnames, i - code->co_argcount, std::move(name));
}
Py_ssize_t nargs = PyVectorcall_NARGS(nargsf) - nkwonly;
if (code->co_flags & CO_VARKEYWORDS) {
nargs -= 1;
}
Py_ssize_t flags = PyVectorcall_FLAGS(nargsf);
return _PyFunction_Vectorcall(func, args, nargs | flags, new_kwnames);
}
static PyFrameObject*
allocateFrame(PyThreadState* tstate, PyCodeObject* code, PyObject* globals) {
if (code->co_zombieframe != NULL) {
__builtin_prefetch(code->co_zombieframe);
}
/* TODO(T45035726) - This is doing more work than it needs to. Compiled code
* doesn't use the frame object at all. It's only there to ensure PyPerf works
* correctly, and PyPerf only needs access to the first argument.
*/
PyObject* builtins = PyEval_GetBuiltins();
if (builtins == NULL) {
return NULL;
}
Py_INCREF(builtins);
PyFrameObject* frame =
_PyFrame_NewWithBuiltins_NoTrack(tstate, code, globals, builtins, NULL);
if (frame == NULL) {
Py_DECREF(builtins);
return NULL;
}
return frame;
}
PyThreadState* JITRT_AllocateAndLinkFrame(
PyCodeObject* code,
PyObject* globals) {
PyThreadState* tstate = PyThreadState_GET();
JIT_DCHECK(tstate != NULL, "thread state cannot be null");
PyFrameObject* frame = allocateFrame(tstate, code, globals);
if (frame == nullptr) {
return nullptr;
}
/* Set the currently-executing flag on the frame */
frame->f_executing = 1;
tstate->frame = frame;
return tstate;
}
void JITRT_DecrefFrame(PyFrameObject* frame) {
if (Py_REFCNT(frame) > 1) {
// If the frame escaped it needs to be tracked
Py_DECREF(frame);
if (!_PyObject_GC_IS_TRACKED(frame)) {
PyObject_GC_Track(frame);
}
} else {
Py_DECREF(frame);
}
}
void JITRT_UnlinkFrame(PyThreadState* tstate) {
PyFrameObject* f = tstate->frame;
f->f_executing = 0;
tstate->frame = f->f_back;
JITRT_DecrefFrame(f);
}
PyObject*
JITRT_LoadGlobal(PyObject* globals, PyObject* builtins, PyObject* name) {
PyObject* result =
_PyDict_LoadGlobal((PyDictObject*)globals, (PyDictObject*)builtins, name);
if ((result == NULL) && !_PyErr_OCCURRED()) {
PyErr_Format(PyExc_NameError, "name '%.200U' is not defined", name);
}
Py_XINCREF(result);
return result;
}
template <bool is_awaited>
static inline PyObject*
call_function(PyObject* func, PyObject** args, Py_ssize_t nargs) {
size_t flags = PY_VECTORCALL_ARGUMENTS_OFFSET |
(is_awaited ? _Py_AWAITED_CALL_MARKER : 0);
return _PyObject_Vectorcall(func, args + 1, (nargs - 1) | flags, NULL);
}
PyObject*
JITRT_CallFunction(PyObject* func, PyObject** args, Py_ssize_t nargs) {
return call_function<false>(func, args, nargs);
}
PyObject*
JITRT_CallFunctionAwaited(PyObject* func, PyObject** args, Py_ssize_t nargs) {
return call_function<true>(func, args, nargs);
}
template <bool is_awaited>
static inline PyObject*
call_function_kwargs(PyObject* func, PyObject** args, Py_ssize_t nargs) {
PyObject* kwargs = args[nargs - 1];
JIT_DCHECK(PyTuple_CheckExact(kwargs), "Kwargs map must be a tuple");
nargs--;
Py_ssize_t nkwargs = PyTuple_GET_SIZE(kwargs);
JIT_DCHECK(nkwargs < nargs, "Kwargs map too large");
nargs -= nkwargs;
size_t flags = PY_VECTORCALL_ARGUMENTS_OFFSET |
(is_awaited ? _Py_AWAITED_CALL_MARKER : 0);
return _PyObject_Vectorcall(func, args + 1, (nargs - 1) | flags, kwargs);
}
PyObject*
JITRT_CallFunctionKWArgs(PyObject* func, PyObject** args, Py_ssize_t nargs) {
return call_function_kwargs<false>(func, args, nargs);
}
PyObject* JITRT_CallFunctionKWArgsAwaited(
PyObject* func,
PyObject** args,
Py_ssize_t nargs) {
return call_function_kwargs<true>(func, args, nargs);
}
template <bool is_awaited>
static inline PyObject*
call_function_ex(PyObject* func, PyObject* pargs, PyObject* kwargs) {
// Normalize p + kw args to tuple and dict types exactly.
Ref<> new_pargs;
// Logically, I don't think this incref of kwargs is needed but not having it
// breaks the C-version of functools.partial. The problem is a ref-count of 1
// on "kw" going into partial_new() triggers an optimization where the kwargs
// are not copied. This fails test_functoools.TestPartial*.test_kwargs_copy
// which asserts it's not possible to alter the kwargs after the call. A
// tempting alternative to this explicit ref managment is to set-up
// the memory effects of CallEx to steal the kwargs input. Unfortunately this
// breaks test_contextlib.ContextManagerTestCase.test_nokeepref by keeping
// kwargs and their contents alive for longer than expected.
Ref<> new_kwargs{kwargs};
if (kwargs) {
if (!PyDict_CheckExact(kwargs)) {
PyObject* d = PyDict_New();
if (d == NULL) {
return NULL;
}
if (PyDict_Update(d, kwargs) != 0) {
Py_DECREF(d);
if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
PyErr_Format(
PyExc_TypeError,
"%.200s%.200s argument after ** "
"must be a mapping, not %.200s",
PyEval_GetFuncName(func),
PyEval_GetFuncDesc(func),
kwargs->ob_type->tp_name);
}
return NULL;
}
kwargs = d;
new_kwargs = Ref<>::steal(kwargs);
}
JIT_DCHECK(PyDict_CheckExact(kwargs), "Expect kwargs to be a dict");
}
if (!PyTuple_CheckExact(pargs)) {
if (pargs->ob_type->tp_iter == NULL && !PySequence_Check(pargs)) {
PyErr_Format(
PyExc_TypeError,
"%.200s%.200s argument after * "
"must be an iterable, not %.200s",
PyEval_GetFuncName(func),
PyEval_GetFuncDesc(func),
pargs->ob_type->tp_name);
return NULL;
}
pargs = PySequence_Tuple(pargs);
if (pargs == NULL) {
return NULL;
}
new_pargs = Ref<>::steal(pargs);
}
JIT_DCHECK(PyTuple_CheckExact(pargs), "Expected pargs to be a tuple");
// Make function call using normalized args.
if (PyCFunction_Check(func)) {
// TODO(jbower): For completeness we should use a vector-call if possible to
// take into account is_awaited. My guess is there aren't going to be many C
// functions which handle _Py_AWAITED_CALL_MARKER.
return PyCFunction_Call(func, pargs, kwargs);
}
if (is_awaited && _PyVectorcall_Function(func) != NULL) {
return _PyVectorcall_Call(func, pargs, kwargs, _Py_AWAITED_CALL_MARKER);
}
return PyObject_Call(func, pargs, kwargs);
}
PyObject* JITRT_LoadFunctionIndirect(PyObject** func, PyObject* descr) {
PyObject* res = *func;
if (!res) {
res = _PyClassLoader_ResolveFunction(descr, NULL);
Py_XDECREF(res);
}
return res;
}
PyObject*
JITRT_CallFunctionEx(PyObject* func, PyObject* pargs, PyObject* kwargs) {
return call_function_ex<false>(func, pargs, kwargs);
}
PyObject*
JITRT_CallFunctionExAwaited(PyObject* func, PyObject* pargs, PyObject* kwargs) {
return call_function_ex<true>(func, pargs, kwargs);
}
template <bool is_awaited>
static inline PyObject*
invoke_function(PyObject* func, PyObject** args, Py_ssize_t nargs) {
size_t flags = _Py_VECTORCALL_INVOKED_STATICALLY |
PY_VECTORCALL_ARGUMENTS_OFFSET |
(is_awaited ? _Py_AWAITED_CALL_MARKER : 0);
return _PyObject_Vectorcall(func, args + 1, (nargs - 1) | flags, NULL);
}
PyObject*
JITRT_InvokeFunction(PyObject* func, PyObject** args, Py_ssize_t nargs) {
return invoke_function<false>(func, args, nargs);
}
PyObject*
JITRT_InvokeFunctionAwaited(PyObject* func, PyObject** args, Py_ssize_t nargs) {
return invoke_function<true>(func, args, nargs);
}
template <bool is_awaited>
static inline PyObject* call_method(
PyObject* callable,
PyObject** args,
Py_ssize_t nargs,
PyObject* kwnames,
JITRT_CallMethodKind call_kind) {
size_t is_awaited_flag = is_awaited ? _Py_AWAITED_CALL_MARKER : 0;
switch (call_kind) {
case JITRT_CALL_KIND_FUNC: {
PyFunctionObject* func = (PyFunctionObject*)callable;
return func->vectorcall(
callable,
args,
nargs | _Py_VECTORCALL_INVOKED_METHOD | is_awaited_flag,
kwnames);
}
case JITRT_CALL_KIND_METHOD_DESCR: {
PyMethodDescrObject* func = (PyMethodDescrObject*)callable;
return func->vectorcall(
callable,
args,
nargs | _Py_VECTORCALL_INVOKED_METHOD | is_awaited_flag,
kwnames);
}
case JITRT_CALL_KIND_METHOD_LIKE: {
return _PyObject_Vectorcall(
callable,
args,
nargs | _Py_VECTORCALL_INVOKED_METHOD | is_awaited_flag,
kwnames);
}
case JITRT_CALL_KIND_WRAPPER_DESCR: {
PyWrapperDescrObject* func = (PyWrapperDescrObject*)callable;
return func->d_vectorcall(
callable,
args,
nargs | _Py_VECTORCALL_INVOKED_METHOD | is_awaited_flag,
kwnames);
}
default: {
// Slow path, should rarely get here
JIT_DCHECK(kwnames == nullptr, "kwnames not supported yet");
return _PyObject_Vectorcall(
callable,
args + 1,
(nargs - 1) | PY_VECTORCALL_ARGUMENTS_OFFSET | is_awaited_flag,
kwnames);
}
}
}
PyObject* JITRT_CallMethod(
PyObject* callable,
PyObject** args,
Py_ssize_t nargs,
PyObject* kwnames,
JITRT_CallMethodKind call_kind) {
return call_method<false>(callable, args, nargs, kwnames, call_kind);
}
PyObject* JITRT_CallMethodAwaited(
PyObject* callable,
PyObject** args,
Py_ssize_t nargs,
PyObject* kwnames,
JITRT_CallMethodKind call_kind) {
return call_method<true>(callable, args, nargs, kwnames, call_kind);
}
void JITRT_Dealloc(PyObject* obj) {
_Py_Dealloc(obj);
}
PyObject* JITRT_UnaryNot(PyObject* value) {
int res = PyObject_IsTrue(value);
if (res == 0) {
Py_INCREF(Py_True);
return Py_True;
} else if (res > 0) {
Py_INCREF(Py_False);
return Py_False;
}
return NULL;
}
static void invalidate_load_method_cache(
PyObject* handle,
PyObject* capsule,
PyObject* modified_type_weakref) {
JITRT_LoadMethodCache* cache =
static_cast<JITRT_LoadMethodCache*>(PyCapsule_GetPointer(capsule, NULL));
PyObject* modified_type = PyWeakref_GetObject(modified_type_weakref);
for (int i = 0; i < LOAD_METHOD_CACHE_SIZE; i++) {
// If the type that was referenced went away, we clear all the cache
// entries as we cannot be sure which ones are invalid.
//
// Otherwise, only clear the matching entry.
if ((modified_type == Py_None) ||
(((PyTypeObject*)modified_type) == cache->entries[i].type)) {
cache->entries[i].type = NULL;
cache->entries[i].value = NULL;
cache->entries[i].call_kind = JITRT_CALL_KIND_OTHER;
}
}
Switchboard_Unsubscribe((Switchboard*)_PyType_GetSwitchboard(), handle);
}
static void fill_method_cache(
JITRT_LoadMethodCache* cache,
PyObject* /* obj */,
PyTypeObject* type,
PyObject* value,
JITRT_CallMethodKind call_kind) {
JITRT_LoadMethodCacheEntry* to_fill = NULL;
if (!PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)) {
// The type must have a valid version tag in order for us to be able to
// invalidate the cache when the type is modified. See the comment at
// the top of `PyType_Modified` for more details.
return;
}
if (!PyType_HasFeature(type, Py_TPFLAGS_NO_SHADOWING_INSTANCES) &&
(type->tp_dictoffset != 0)) {
return;
}
for (int i = 0; i < LOAD_METHOD_CACHE_SIZE; i++) {
if (cache->entries[i].type == NULL) {
to_fill = &(cache->entries[i]);
break;
}
}
if (to_fill == NULL) {
return;
}
PyObject* capsule = PyCapsule_New(cache, NULL, NULL);
if (capsule == NULL) {
return;
}
Switchboard* sb = (Switchboard*)_PyType_GetSwitchboard();
PyObject* handle = Switchboard_Subscribe(
sb, (PyObject*)type, invalidate_load_method_cache, capsule);
Py_XDECREF(handle);
Py_DECREF(capsule);
if (handle == NULL) {
return;
}
to_fill->type = type;
to_fill->value = value;
to_fill->call_kind = call_kind;
}
static PyObject* __attribute__((noinline)) get_method_slow_path(
PyObject* obj,
PyObject* name,
JITRT_LoadMethodCache* cache,
JITRT_CallMethodKind* call_kind) {
PyTypeObject* tp = Py_TYPE(obj);
PyObject* descr;
descrgetfunc f = NULL;
PyObject **dictptr, *dict;
PyObject* attr;
JITRT_CallMethodKind found_kind = JITRT_CALL_KIND_OTHER;
if ((tp->tp_getattro != PyObject_GenericGetAttr)) {
*call_kind = JITRT_CALL_KIND_OTHER;
return PyObject_GetAttr(obj, name);
} else if (tp->tp_dict == NULL && PyType_Ready(tp) < 0) {
return NULL;
}
descr = _PyType_Lookup(tp, name);
if (descr != NULL) {
Py_INCREF(descr);
if (PyFunction_Check(descr)) {
found_kind = JITRT_CALL_KIND_FUNC;
} else if (Py_TYPE(descr) == &PyMethodDescr_Type) {
found_kind = JITRT_CALL_KIND_METHOD_DESCR;
} else if (PyType_HasFeature(
Py_TYPE(descr), Py_TPFLAGS_METHOD_DESCRIPTOR)) {
found_kind = JITRT_CALL_KIND_METHOD_LIKE;
} else {
f = descr->ob_type->tp_descr_get;
if (f != NULL && PyDescr_IsData(descr)) {
PyObject* result = f(descr, obj, (PyObject*)obj->ob_type);
Py_DECREF(descr);
*call_kind = JITRT_CALL_KIND_OTHER;
return result;
}
}
}
dictptr = _PyObject_GetDictPtr(obj);
if (dictptr != NULL && (dict = *dictptr) != NULL) {
Py_INCREF(dict);
attr = PyDict_GetItem(dict, name);
if (attr != NULL) {
Py_INCREF(attr);
Py_DECREF(dict);
Py_XDECREF(descr);
*call_kind = JITRT_CALL_KIND_OTHER;
return attr;
}
Py_DECREF(dict);
}
if (found_kind == JITRT_CALL_KIND_FUNC ||
found_kind == JITRT_CALL_KIND_METHOD_DESCR ||
found_kind == JITRT_CALL_KIND_METHOD_LIKE) {
*call_kind = found_kind;
fill_method_cache(cache, obj, tp, descr, found_kind);
return descr;
}
if (f != NULL) {
PyObject* result = f(descr, obj, (PyObject*)Py_TYPE(obj));
Py_DECREF(descr);
*call_kind = JITRT_CALL_KIND_OTHER;
return result;
}
if (descr != NULL) {
*call_kind = JITRT_CALL_KIND_OTHER;
return descr;
}
PyErr_Format(
PyExc_AttributeError,
"'%.50s' object has no attribute '%U'",
tp->tp_name,
name);
return NULL;
}
PyObject* __attribute__((hot)) JITRT_GetMethod(
PyObject* obj,
PyObject* name,
JITRT_LoadMethodCache* cache,
JITRT_CallMethodKind* call_kind) {
PyTypeObject* tp = Py_TYPE(obj);
for (int i = 0; i < LOAD_METHOD_CACHE_SIZE; i++) {
if (cache->entries[i].type == tp) {
PyObject* result = cache->entries[i].value;
Py_INCREF(result);
*call_kind = cache->entries[i].call_kind;
return result;
}
}
return get_method_slow_path(obj, name, cache, call_kind);
}
PyObject* JITRT_GetMethodFromSuper(
PyObject* global_super,
PyObject* type,
PyObject* self,
PyObject* name,
bool no_args_in_super_call,
JITRT_CallMethodKind* call_kind) {
int meth_found = 0;
PyObject* result = _PyEval_SuperLookupMethodOrAttr(
PyThreadState_GET(),
global_super,
(PyTypeObject*)type,
self,
name,
no_args_in_super_call,
&meth_found);
if (result == NULL) {
return NULL;
}
if (meth_found) {
if (PyFunction_Check(result)) {
*call_kind = JITRT_CALL_KIND_FUNC;
} else if (Py_TYPE(result) == &PyMethodDescr_Type) {
*call_kind = JITRT_CALL_KIND_METHOD_DESCR;
} else if (Py_TYPE(result) == &PyWrapperDescr_Type) {
*call_kind = JITRT_CALL_KIND_WRAPPER_DESCR;
} else if (PyType_HasFeature(
Py_TYPE(result), Py_TPFLAGS_METHOD_DESCRIPTOR)) {
*call_kind = JITRT_CALL_KIND_METHOD_LIKE;
} else {
*call_kind = JITRT_CALL_KIND_OTHER;
}
} else {
*call_kind = JITRT_CALL_KIND_OTHER;
}
return result;
}
PyObject* JITRT_GetAttrFromSuper(
PyObject* global_super,
PyObject* type,
PyObject* self,
PyObject* name,
bool no_args_in_super_call) {
return _PyEval_SuperLookupMethodOrAttr(
PyThreadState_GET(),
global_super,
(PyTypeObject*)type,
self,
name,
no_args_in_super_call,
NULL);
}
void JITRT_InitLoadMethodCache(JITRT_LoadMethodCache* cache) {
memset(cache, 0, sizeof(*cache));
}
PyObject* JITRT_InvokeMethod(
Py_ssize_t slot,
PyObject** args,
Py_ssize_t nargs,
PyObject* kwnames) {
PyTypeObject* self_type = Py_TYPE(args[0]);
_PyType_VTable* vtable = (_PyType_VTable*)self_type->tp_cache;
PyObject* func = vtable->vt_entries[slot].vte_state;
return vtable->vt_entries[slot].vte_entry(
func, args, nargs | _Py_VECTORCALL_INVOKED_STATICALLY, kwnames);
}
PyObject* JITRT_InvokeClassMethod(
Py_ssize_t slot,
PyObject** args,
Py_ssize_t nargs,
PyObject* kwnames) {
PyTypeObject* self_type = (PyTypeObject*)args[0];
_PyType_VTable* vtable = (_PyType_VTable*)self_type->tp_cache;
PyObject* func = vtable->vt_entries[slot].vte_state;
return vtable->vt_entries[slot].vte_entry(
func,
args,
nargs | _Py_VECTORCALL_INVOKED_STATICALLY |
_Py_VECTORCALL_INVOKED_CLASSMETHOD,
kwnames);
}
PyObject* JITRT_TypeCheck(PyObject* obj, PyTypeObject* type) {
if (PyObject_TypeCheck(obj, type)) {
return Py_True;
}
return Py_False;
}
PyObject* JITRT_TypeCheckExact(PyObject* obj, PyTypeObject* type) {
if (_PyObject_TypeCheckOptional(obj, type, /* opt */ 1, /* exact */ 1)) {
return Py_True;
}
return Py_False;
}
PyObject* JITRT_TypeCheckFloat(PyObject* obj) {
if (PyObject_TypeCheck(obj, &PyFloat_Type) ||
PyObject_TypeCheck(obj, &PyLong_Type)) {
return Py_True;
}
return Py_False;
}
PyObject* JITRT_TypeCheckOptional(PyObject* obj, PyTypeObject* type) {
if (_PyObject_TypeCheckOptional(obj, type, /* opt */ 1, /* exact */ 0)) {
return Py_True;
}
return Py_False;
}
PyObject* JITRT_TypeCheckOptionalExact(PyObject* obj, PyTypeObject* type) {
if (_PyObject_TypeCheckOptional(obj, type, /* opt */ 1, /* exact */ 1)) {
return Py_True;
}
return Py_False;
}
PyObject* JITRT_TypeCheckFloatOptional(PyObject* obj) {
if (obj == Py_None || PyObject_TypeCheck(obj, &PyFloat_Type) ||
PyObject_TypeCheck(obj, &PyLong_Type)) {
return Py_True;
}
return Py_False;
}
/* This function is inlined to LIR via kCHelpersManual, so changes here will
* have no effect. */
PyObject* JITRT_Cast(PyObject* obj, PyTypeObject* type) {
if (PyObject_TypeCheck(obj, type)) {
return obj;
}
PyErr_Format(
PyExc_TypeError,
"expected '%s', got '%s'",
type->tp_name,
Py_TYPE(obj)->tp_name);
return NULL;
}
PyObject* JITRT_CastOptional(PyObject* obj, PyTypeObject* type) {
if (_PyObject_TypeCheckOptional(obj, type, /* opt */ 1, /* exact */ 0)) {
return obj;
}
PyErr_Format(
PyExc_TypeError,
"expected '%s', got '%s'",
type->tp_name,
Py_TYPE(obj)->tp_name);
return NULL;
}
PyObject* JITRT_CastExact(PyObject* obj, PyTypeObject* type) {
if (_PyObject_TypeCheckOptional(obj, type, /* opt */ 0, /* exact */ 1)) {
return obj;
}
PyErr_Format(
PyExc_TypeError,
"expected exactly '%s', got '%s'",
type->tp_name,
Py_TYPE(obj)->tp_name);
return NULL;
}
PyObject* JITRT_CastOptionalExact(PyObject* obj, PyTypeObject* type) {
if (_PyObject_TypeCheckOptional(obj, type, /* opt */ 1, /* exact */ 1)) {
return obj;
}
PyErr_Format(
PyExc_TypeError,
"expected exactly '%s', got '%s'",
type->tp_name,
Py_TYPE(obj)->tp_name);
return NULL;
}
/* Needed because cast to float does extra work that would be a pain to add to
* the manual inlined LIR for JITRT_Cast. */
PyObject* JITRT_CastToFloat(PyObject* obj) {
if (PyObject_TypeCheck(obj, &PyFloat_Type)) {
// cast to float is not considered pass-through by refcount insertion (since
// it may produce a new reference), so even if in fact it is pass-through
// (because we got a float), we need to return a new reference.
Py_INCREF(obj);
return obj;
} else if (PyObject_TypeCheck(obj, &PyLong_Type)) {
// special case because Python typing pretends int subtypes float
return PyFloat_FromDouble(PyLong_AsLong(obj));
}
PyErr_Format(
PyExc_TypeError, "expected 'float', got '%s'", Py_TYPE(obj)->tp_name);
return NULL;
}
PyObject* JITRT_CastToFloatOptional(PyObject* obj) {
if (_PyObject_TypeCheckOptional(
obj, &PyFloat_Type, /* opt */ 1, /* exact */ 0)) {
// cast to float is not considered pass-through by refcount insertion (since
// it may produce a new reference), so even if in fact it is pass-through
// (because we got a float), we need to return a new reference.
Py_INCREF(obj);
return obj;
} else if (PyObject_TypeCheck(obj, &PyLong_Type)) {
// special case because Python typing pretends int subtypes float
return PyFloat_FromDouble(PyLong_AsLong(obj));
}
PyErr_Format(
PyExc_TypeError, "expected 'float', got '%s'", Py_TYPE(obj)->tp_name);
return NULL;
}
int64_t JITRT_ShiftLeft64(int64_t x, int64_t y) {
return x << y;
}
int32_t JITRT_ShiftLeft32(int32_t x, int32_t y) {
return x << y;
}
int64_t JITRT_ShiftRight64(int64_t x, int64_t y) {
return x >> y;
}
int32_t JITRT_ShiftRight32(int32_t x, int32_t y) {
return x >> y;
}
uint64_t JITRT_ShiftRightUnsigned64(uint64_t x, uint64_t y) {
return x >> y;
}
uint32_t JITRT_ShiftRightUnsigned32(uint32_t x, uint32_t y) {
return x >> y;
}
int64_t JITRT_Mod64(int64_t x, int64_t y) {
return x % y;
}
int32_t JITRT_Mod32(int32_t x, int32_t y) {
return x % y;
}
uint64_t JITRT_ModUnsigned64(uint64_t x, uint64_t y) {
return x % y;
}
uint32_t JITRT_ModUnsigned32(uint32_t x, uint32_t y) {
return x % y;
}
PyObject* JITRT_BoxI32(int32_t i) {
return PyLong_FromLong(i);
}
PyObject* JITRT_BoxU32(uint32_t i) {
return PyLong_FromUnsignedLong(i);
}
PyObject* JITRT_BoxBool(uint32_t i) {
if (i) {
Py_RETURN_TRUE;
}
Py_RETURN_FALSE;
}
PyObject* JITRT_BoxI64(int64_t i) {
return PyLong_FromSsize_t(i);
}
PyObject* JITRT_BoxU64(uint64_t i) {
return PyLong_FromSize_t(i);
}
PyObject* JITRT_BoxDouble(double_t d) {
return PyFloat_FromDouble(d);
}
PyObject* JITRT_BoxEnum(int64_t i, uint64_t t) {
PyObject* val = PyLong_FromSsize_t(i);
PyObject* ret = _PyObject_Call1Arg((PyObject*)t, val);
Py_DECREF(val);
return ret;
}
uint64_t JITRT_IsNegativeAndErrOccurred_64(int64_t i) {
return (i == -1 && _PyErr_OCCURRED()) ? -1 : 0;
}
uint64_t JITRT_IsNegativeAndErrOccurred_32(int32_t i) {
return (i == -1 && _PyErr_OCCURRED()) ? -1 : 0;
}
double JITRT_PowerDouble(double x, double y) {
return pow(x, y);
}
double JITRT_Power32(int32_t x, int32_t y) {
return pow(x, y);
}
double JITRT_PowerUnsigned32(uint32_t x, uint32_t y) {
return pow(x, y);
}
double JITRT_Power64(int64_t x, int64_t y) {
return pow(x, y);
}
double JITRT_PowerUnsigned64(uint64_t x, uint64_t y) {
return pow(x, y);
}
uint64_t JITRT_GetI8_FromArray(char* arr, int64_t idx, ssize_t offset) {
long result = (arr + offset)[idx];
if (result >= 128)
result -= 256;
return result;
}
uint64_t JITRT_GetU8_FromArray(char* arr, int64_t idx, ssize_t offset) {
long result = ((unsigned char*)(arr + offset))[idx];
return result;
}
uint64_t JITRT_GetI16_FromArray(char* arr, int64_t idx, ssize_t offset) {
return (long)((short*)(arr + offset))[idx];
}
uint64_t JITRT_GetU16_FromArray(char* arr, int64_t idx, ssize_t offset) {
return (long)((unsigned short*)(arr + offset))[idx];
}
uint64_t JITRT_GetI32_FromArray(char* arr, int64_t idx, ssize_t offset) {
return ((long*)(arr + offset))[idx];
}
uint64_t JITRT_GetU32_FromArray(char* arr, int64_t idx, ssize_t offset) {
return ((unsigned long*)(arr + offset))[idx];
}
uint64_t JITRT_GetI64_FromArray(char* arr, int64_t idx, ssize_t offset) {
return ((long long*)(arr + offset))[idx];
}
uint64_t JITRT_GetU64_FromArray(char* arr, int64_t idx, ssize_t offset) {
return ((unsigned long long*)(arr + offset))[idx];
}
PyObject* JITRT_GetObj_FromArray(char* arr, int64_t idx, ssize_t offset) {
return ((PyObject**)(arr + offset))[idx];
}
void JITRT_SetI8_InArray(char* arr, uint64_t val, int64_t idx) {
arr[idx] = (char)val;
}
void JITRT_SetU8_InArray(char* arr, uint64_t val, int64_t idx) {
arr[idx] = (unsigned char)val;
}
void JITRT_SetI16_InArray(char* arr, uint64_t val, int64_t idx) {
((short*)arr)[idx] = (short)val;
}
void JITRT_SetU16_InArray(char* arr, uint64_t val, int64_t idx) {
((unsigned short*)arr)[idx] = (unsigned short)val;
}
void JITRT_SetI32_InArray(char* arr, uint64_t val, int64_t idx) {
((int*)arr)[idx] = (int)val;
}
void JITRT_SetU32_InArray(char* arr, uint64_t val, int64_t idx) {
((unsigned int*)arr)[idx] = (unsigned int)val;
}
void JITRT_SetI64_InArray(char* arr, uint64_t val, int64_t idx) {
((long*)arr)[idx] = (long)val;
}
void JITRT_SetU64_InArray(char* arr, uint64_t val, int64_t idx) {
((unsigned long*)arr)[idx] = (unsigned long)val;
}
void JITRT_SetObj_InArray(char* arr, uint64_t val, int64_t idx) {
((PyObject**)arr)[idx] = (PyObject*)val;
}
template <typename T>
static T checkedUnboxImpl(PyObject* obj) {
constexpr bool is_signed = std::is_signed_v<T>;
std::conditional_t<is_signed, int64_t, uint64_t> res;
if constexpr (is_signed) {
res = PyLong_AsSsize_t(obj);
} else {
res = PyLong_AsSize_t(obj);
}
if (T(res) == res || (!is_signed && res == T(-1) && _PyErr_OCCURRED())) {
return res;
}
PyErr_SetString(PyExc_OverflowError, "int overflow");
return -1;
}
uint64_t JITRT_UnboxU64(PyObject* obj) {
return PyLong_AsSize_t(obj);
}
uint32_t JITRT_UnboxU32(PyObject* obj) {
return checkedUnboxImpl<uint32_t>(obj);
}
uint16_t JITRT_UnboxU16(PyObject* obj) {
return checkedUnboxImpl<uint16_t>(obj);
}
uint8_t JITRT_UnboxU8(PyObject* obj) {
return checkedUnboxImpl<uint8_t>(obj);
}
int64_t JITRT_UnboxI64(PyObject* obj) {
return PyLong_AsSsize_t(obj);
}
int32_t JITRT_UnboxI32(PyObject* obj) {
return checkedUnboxImpl<int32_t>(obj);
}
int16_t JITRT_UnboxI16(PyObject* obj) {
return checkedUnboxImpl<int16_t>(obj);
}
int8_t JITRT_UnboxI8(PyObject* obj) {
return checkedUnboxImpl<int8_t>(obj);
}
int64_t JITRT_UnboxEnum(PyObject* obj) {
PyObject* value = PyObject_GetAttrString(obj, "value");
if (value == NULL) {
return -1;
}
Py_ssize_t ret = PyLong_AsSsize_t(value);
Py_DECREF(value);
return ret;
}
PyObject* JITRT_ImportName(
PyThreadState* tstate,
PyObject* name,
PyObject* fromlist,
PyObject* level) {
_Py_IDENTIFIER(__import__);
PyObject *import_func, *res;
PyObject* stack[5];
PyObject* globals = PyEval_GetGlobals();
PyObject* builtins = tstate->interp->builtins;
import_func = _PyDict_GetItemId(builtins, &PyId___import__);
if (import_func == NULL) {
PyErr_SetString(PyExc_ImportError, "__import__ not found");
return NULL;
}
/* Fast path for not overloaded __import__. */
if (import_func == tstate->interp->import_func) {
int ilevel = _PyLong_AsInt(level);
if (ilevel == -1 && _PyErr_Occurred(tstate)) {
return NULL;
}
res = PyImport_ImportModuleLevelObject(
name,
globals,
// Locals are not actually used by the builtin import.
// This is documented behavior as of Python 3.7.
Py_None,
fromlist,
ilevel);
return res;
}
Py_INCREF(import_func);
stack[0] = name;
stack[1] = globals;
// In this implementation we always pass None for locals as it's easier than
// fully materializing them now. The CPython interpreter has strange
// (probably broken) behavior - it will only pass a dictionary of locals to
// __builtins__.__import__() if the locals have been materialized already,
// for example by a call to locals(). Reliance on this behavior is unlikely.
stack[2] = Py_None;
stack[3] = fromlist;
stack[4] = level;
res = _PyObject_FastCall(import_func, stack, 5);
Py_DECREF(import_func);
return res;
}
void JITRT_DoRaise(PyThreadState* tstate, PyObject* exc, PyObject* cause) {
// If we re-raise with no error set, deliberately do nothing and let
// prepareForDeopt() handle this. We can't let _Py_DoRaise() handle this by
// raising a RuntimeError as this would mean prepareForDeopt() does not call
// PyTraceBack_Here().
if (exc == NULL) {
auto* exc_info = _PyErr_GetTopmostException(tstate);
auto type = exc_info->exc_type;
if (type == Py_None || type == NULL) {
return;
}
}
// We deliberately discard the return value here. In the interpreter a return
// value of 1 indicates a _valid_ re-raise which skips:
// (1) Calling PyTraceBack_Here().
// (2) Raising a SystemError if no exception is set (no need, _Py_DoRaise
// already handles this).
// (3) Calling tstate->c_tracefunc.
// We don't support (3) and handle (1) + (2) between the check above and in
// prepareForDeopt().
_Py_DoRaise(tstate, exc, cause);
}
// JIT generator data free-list globals
const size_t kGenDataFreeListMaxSize = 1024;
static size_t gen_data_free_list_size = 0;
static void* gen_data_free_list_tail;
static void* gen_data_allocate(size_t spill_words) {
if (spill_words > jit::kMinGenSpillWords || !gen_data_free_list_size) {
auto data =
malloc(spill_words * sizeof(uint64_t) + sizeof(jit::GenDataFooter));
auto footer = reinterpret_cast<jit::GenDataFooter*>(
reinterpret_cast<uint64_t*>(data) + spill_words);
footer->spillWords = spill_words;
return data;
}
// All free list entries are spill-word size 89, so we don't need to set
// footer->spillWords again, it should still be set to 89 from previous use.
JIT_DCHECK(spill_words == jit::kMinGenSpillWords, "invalid size");
gen_data_free_list_size--;
auto res = gen_data_free_list_tail;
gen_data_free_list_tail = *reinterpret_cast<void**>(gen_data_free_list_tail);
return res;
}
void JITRT_GenJitDataFree(PyGenObject* gen) {
auto gen_data_footer =
reinterpret_cast<jit::GenDataFooter*>(gen->gi_jit_data);
auto gen_data = reinterpret_cast<uint64_t*>(gen_data_footer) -
gen_data_footer->spillWords;
if (gen_data_footer->spillWords != jit::kMinGenSpillWords ||
gen_data_free_list_size == kGenDataFreeListMaxSize) {
free(gen_data);
return;
}
if (gen_data_free_list_size) {
*reinterpret_cast<void**>(gen_data) = gen_data_free_list_tail;
}
gen_data_free_list_size++;
gen_data_free_list_tail = gen_data;
}
enum class MakeGenObjectMode {
kAsyncGenerator,
kCoroutine,
kGenerator,
};
template <MakeGenObjectMode mode>
static inline PyObject* make_gen_object(
GenResumeFunc resume_entry,
PyThreadState* tstate,
size_t spill_words,
jit::CodeRuntime* code_rt,
PyCodeObject* code) {
PyGenObject* gen = nullptr;
if (_PyJIT_ShadowFrame() || code->co_flags & CO_SHADOW_FRAME) {
if (mode == MakeGenObjectMode::kCoroutine) {
gen = reinterpret_cast<PyGenObject*>(_PyCoro_NewNoFrame(tstate, code));
} else if (mode == MakeGenObjectMode::kAsyncGenerator) {
gen = reinterpret_cast<PyGenObject*>(_PyAsyncGen_NewNoFrame(code));
} else {
gen = reinterpret_cast<PyGenObject*>(_PyGen_NewNoFrame(code));
}
} else {
PyFrameObject* f =
allocateFrame(tstate, code, code_rt->frameState()->globals());
// This clearing of f_back only when returning a generator matches
// CPython's generator handling in _PyEval_EvalCodeWithName; it also avoids
// keeping the parent frame alive longer than necessary if the caller
// finishes before the genereator is resumed.
Py_CLEAR(f->f_back);
if (mode == MakeGenObjectMode::kCoroutine) {
gen = reinterpret_cast<PyGenObject*>(
_PyCoro_NewTstate(tstate, f, code->co_name, code->co_qualname));
PyFrameObject* parent_f = tstate->frame;
auto UTF8_name = PyUnicode_AsUTF8(parent_f->f_code->co_name);
if (!strcmp(UTF8_name, "<genexpr>") || !strcmp(UTF8_name, "<listcomp>") ||
!strcmp(UTF8_name, "<dictcomp>")) {
reinterpret_cast<PyCoroObject*>(gen)->creator = parent_f->f_back;
} else {
reinterpret_cast<PyCoroObject*>(gen)->creator = parent_f;
}
} else if (mode == MakeGenObjectMode::kAsyncGenerator) {
gen = reinterpret_cast<PyGenObject*>(
PyAsyncGen_New(f, code->co_name, code->co_qualname));
} else {
gen = reinterpret_cast<PyGenObject*>(
PyGen_NewWithQualName(f, code->co_name, code->co_qualname));
}
}
if (gen == nullptr) {
return nullptr;
}
gen->gi_shadow_frame.data = gen->gi_frame == nullptr
? _PyShadowFrame_MakeData(code_rt, PYSF_CODE_RT, PYSF_JIT)
: _PyShadowFrame_MakeData(gen->gi_frame, PYSF_PYFRAME, PYSF_JIT);
spill_words = std::max(spill_words, jit::kMinGenSpillWords);
auto suspend_data = gen_data_allocate(spill_words);
auto footer = reinterpret_cast<jit::GenDataFooter*>(
reinterpret_cast<uint64_t*>(suspend_data) + spill_words);
footer->resumeEntry = resume_entry;
footer->yieldPoint = nullptr;
footer->state = _PyJitGenState_JustStarted;
footer->gen = gen;
footer->code_rt = code_rt;
gen->gi_jit_data = reinterpret_cast<_PyJIT_GenData*>(footer);
return reinterpret_cast<PyObject*>(gen);
}
PyObject* JITRT_MakeGenObject(
GenResumeFunc resume_entry,
PyThreadState* tstate,
size_t spill_words,
jit::CodeRuntime* code_rt,
PyCodeObject* code) {
return make_gen_object<MakeGenObjectMode::kGenerator>(
resume_entry, tstate, spill_words, code_rt, code);
}
PyObject* JITRT_MakeGenObjectAsyncGen(
GenResumeFunc resume_entry,
PyThreadState* tstate,
size_t spill_words,
jit::CodeRuntime* code_rt,
PyCodeObject* code) {
return make_gen_object<MakeGenObjectMode::kAsyncGenerator>(
resume_entry, tstate, spill_words, code_rt, code);
}
PyObject* JITRT_MakeGenObjectCoro(
GenResumeFunc resume_entry,
PyThreadState* tstate,
size_t spill_words,
jit::CodeRuntime* code_rt,
PyCodeObject* code) {
return make_gen_object<MakeGenObjectMode::kCoroutine>(
resume_entry, tstate, spill_words, code_rt, code);
}
void JITRT_SetCurrentAwaiter(PyObject* awaitable, PyThreadState* ts) {
_PyShadowFrame* sf = ts->shadow_frame;
// TODO(bsimmers): This may need to change when we support eager evaluation
// of coroutines.
auto awaiter = reinterpret_cast<PyObject*>(_PyShadowFrame_GetGen(sf));
_PyAwaitable_SetAwaiter(awaitable, awaiter);
}
JITRT_YieldFromRes JITRT_YieldFrom(
PyObject* gen,
PyObject* v,
PyThreadState* tstate,
uint64_t finish_yield_from) {
if (v == NULL) {
return {NULL, 1};
}
if (finish_yield_from) {
Py_INCREF(v);
return {v, 1};
}
PyObject* retval;
auto gen_status = PyIter_Send(tstate, gen, v, &retval);
if (gen_status == PYGEN_RETURN) {
return {retval, 1};
}
if (gen_status == PYGEN_ERROR) {
return {NULL, 1};
}
JIT_DCHECK(gen_status == PYGEN_NEXT, "Unexpected gen_status:", gen_status);
return {retval, 0};
}
PyObject* JITRT_FormatValue(
PyThreadState* tstate,
PyObject* fmt_spec,
PyObject* value,
int conversion) {
PyObject* (*conv_fn)(PyObject*);
/* See if any conversion is specified. */
switch (conversion) {
case FVC_NONE:
conv_fn = NULL;
break;
case FVC_STR:
conv_fn = PyObject_Str;
break;
case FVC_REPR:
conv_fn = PyObject_Repr;
break;
case FVC_ASCII:
conv_fn = PyObject_ASCII;
break;
default:
_PyErr_Format(
tstate,
PyExc_SystemError,
"unexpected conversion flag %d",
conversion);
return NULL;
}
/* If there's a conversion function, call it and replace
value with that result. Otherwise, just use value,
without conversion. */
Ref<> converted;
if (conv_fn != NULL) {
converted = Ref<>::steal(conv_fn(value));
if (converted == nullptr) {
return nullptr;
}
value = converted.get();
}
/* If value is a unicode object, and there's no fmt_spec,
then we know the result of format(value) is value
itself. In that case, skip calling format(). I plan to
move this optimization in to PyObject_Format()
itself. */
if (PyUnicode_CheckExact(value) && fmt_spec == NULL) {
/* Do nothing, just return. */
Py_INCREF(value);
return value;
}
/* Actually call format(). */
return PyObject_Format(value, fmt_spec);
}
PyObject* JITRT_BuildString(
void* /*unused*/,
PyObject** args,
size_t nargsf,
void* /*unused*/) {
size_t nargs = PyVectorcall_NARGS(nargsf);
Ref<> empty = Ref<>::steal(PyUnicode_New(0, 0));
if (empty == nullptr) {
return nullptr;
}
return _PyUnicode_JoinArray(empty, args, nargs);
}
JITRT_StaticCallReturn
JITRT_CompileFunction(PyFunctionObject* func, PyObject** args, bool* compiled) {
void* no_error = (void*)1;
if (_PyJIT_IsCompiled((PyObject*)func) ||
_PyJIT_CompileFunction(func) == PYJIT_RESULT_OK) {
*compiled = 1;
void** indirect =
jit::codegen::NativeGeneratorFactory::runtime()->findFunctionEntryCache(
func);
*indirect = (void*)JITRT_GET_STATIC_ENTRY(func->vectorcall);
return JITRT_StaticCallReturn{
(void*)JITRT_GET_STATIC_ENTRY(func->vectorcall), no_error};
}
*compiled = 0;
PyCodeObject* code = (PyCodeObject*)func->func_code;
int total_args = code->co_argcount;
if (code->co_flags & CO_VARARGS) {
total_args++;
}
if (code->co_flags & CO_VARKEYWORDS) {
total_args++;
}
// PyObject** args is:
// arg0
// arg1
// arg2
// arg3
// arg4
// arg5
// &compiled
// dummy
// previous rbp
// return address to JITed code
// memory argument 0
// memory argument 1
// ...
PyObject** dest_args;
PyObject* final_args[total_args];
if (total_args <= 6) {
// no gap in args to worry about
dest_args = args;
} else {
for (int i = 0; i < 6; i++) {
final_args[i] = args[i];
}
for (int i = 6; i < total_args; i++) {
final_args[i] = args[i + 4];
}
dest_args = final_args;
}
_PyTypedArgsInfo* arg_info = jit::codegen::NativeGeneratorFactory::runtime()
->findFunctionPrimitiveArgInfo(func);
PyObject* allocated_args[arg_info == nullptr ? 0 : Py_SIZE(arg_info)];
int allocated_count = 0;
if (arg_info != nullptr) {
// We have primitive values that need to be converted into boxed values
// to run the interpreter loop.
for (Py_ssize_t i = 0; i < Py_SIZE(arg_info); i++) {
if (arg_info->tai_args[i].tai_primitive_type != -1) {
// primitive type, box...
int arg = arg_info->tai_args[i].tai_argnum;
uint64_t arg_val;
if (arg >= 6) {
arg += 4;
}
arg_val = (uint64_t)args[arg];
PyTypeObject* arg_type = arg_info->tai_args[i].tai_type;
PyObject* new_val;
if (_PyClassLoader_IsEnum(arg_type)) {
new_val = JITRT_BoxEnum((int64_t)arg_val, (uint64_t)arg_type);
} else {
switch (arg_info->tai_args[i].tai_primitive_type) {
case TYPED_BOOL:
new_val = arg_val ? Py_True : Py_False;
break;
case TYPED_INT8:
new_val = PyLong_FromLong((int8_t)arg_val);
break;
case TYPED_INT16:
new_val = PyLong_FromLong((int16_t)arg_val);
break;
case TYPED_INT32:
new_val = PyLong_FromLong((int32_t)arg_val);
break;
case TYPED_INT64:
new_val = PyLong_FromSsize_t((Py_ssize_t)arg_val);
break;
case TYPED_UINT8:
new_val = PyLong_FromUnsignedLong((uint8_t)arg_val);
break;
case TYPED_UINT16:
new_val = PyLong_FromUnsignedLong((uint16_t)arg_val);
break;
case TYPED_UINT32:
new_val = PyLong_FromUnsignedLong((uint32_t)arg_val);
break;
case TYPED_UINT64:
new_val = PyLong_FromSize_t((size_t)arg_val);
break;
default:
assert(false);
PyErr_SetString(PyExc_RuntimeError, "unsupported primitive type");
new_val = nullptr;
}
}
if (new_val == nullptr) {
for (int i = 0; i < allocated_count; i++) {
Py_DECREF(allocated_args[i]);
}
return JITRT_StaticCallReturn{nullptr, nullptr};
}
// we can update the incoming arg array, either it's
// the pushed values on the stack by the trampoline, or
// it's final_args we allocated above.
dest_args[arg] = new_val;
allocated_args[allocated_count++] = new_val;
}
}
}
PyObject* res =
_PyObject_Vectorcall((PyObject*)func, dest_args, total_args, NULL);
for (int i = 0; i < allocated_count; i++) {
Py_DECREF(allocated_args[i]);
}
// If there was an error, don't try to unbox null
if (res == nullptr) {
return JITRT_StaticCallReturn{res, nullptr};
}
// If we are supposed to be returning a primitive, it needs unboxing because
// our caller expected this to be a static->static direct invoke, we just
// failed to JIT the callee.
int optional, exact;
PyTypeObject* ret_type = _PyClassLoader_ResolveType(
_PyClassLoader_GetReturnTypeDescr(func), &optional, &exact);
if (_PyClassLoader_IsEnum(ret_type)) {
Py_DECREF(ret_type);
void* ival = (void*)JITRT_UnboxEnum(res);
return JITRT_StaticCallReturn{ival, no_error};
}
int ret_code = _PyClassLoader_GetTypeCode(ret_type);
Py_DECREF(ret_type);
if (ret_code != TYPED_OBJECT) {
// we can always unbox to 64-bit, the JIT will just ignore the higher bits.
// (TODO) This means that overflow here will give weird results, but
// overflow in primitive ints in static python is undefined behavior right
// now anyway, until we implement overflow checking. It doesn't make sense
// to implement overflow checking just here in the "unjitable" code path,
// when overflow won't be checked if the code is JITted.
void* ival;
if (ret_code == TYPED_BOOL) {
ival = (void*)(res == Py_True);
} else if (ret_code & TYPED_INT_SIGNED) {
ival = (void*)JITRT_UnboxI64(res);
} else {
ival = (void*)JITRT_UnboxU64(res);
}
return JITRT_StaticCallReturn{ival, no_error};
}
return JITRT_StaticCallReturn{res, no_error};
}
PyObject* JITRT_UnpackExToTuple(
PyThreadState* tstate,
PyObject* iterable,
int before,
int after) {
JIT_DCHECK(iterable != nullptr, "The iterable cannot be null.");
Ref<> it = Ref<>::steal(PyObject_GetIter(iterable));
if (it == NULL) {
if (_PyErr_ExceptionMatches(tstate, PyExc_TypeError) &&
iterable->ob_type->tp_iter == NULL && !PySequence_Check(iterable)) {
_PyErr_Format(
tstate,
PyExc_TypeError,
"cannot unpack non-iterable %.200s object",
iterable->ob_type->tp_name);
}
return nullptr;
}
int totalargs = before + after + 1;
Ref<PyTupleObject> tuple = Ref<PyTupleObject>::steal(PyTuple_New(totalargs));
if (tuple == nullptr) {
return nullptr;
}
int ti = 0;
for (int i = 0; i < before; i++) {
PyObject* w = PyIter_Next(it);
if (w == NULL) {
/* Iterator done, via error or exhaustion. */
if (!_PyErr_Occurred(tstate)) {
if (after == -1) {
_PyErr_Format(
tstate,
PyExc_ValueError,
"not enough values to unpack "
"(expected %d, got %d)",
before,
i);
} else {
_PyErr_Format(
tstate,
PyExc_ValueError,
"not enough values to unpack "
"(expected at least %d, got %d)",
before + after,
i);
}
}
return nullptr;
}
tuple->ob_item[ti++] = w;
}
JIT_DCHECK(
after >= 0,
"This function should only be used for UNPACK_EX, where after >= 0.");
PyObject* list = PySequence_List(it);
if (list == NULL) {
return nullptr;
}
tuple->ob_item[ti++] = list;
ssize_t list_size = PyList_GET_SIZE(list);
if (list_size < after) {
_PyErr_Format(
tstate,
PyExc_ValueError,
"not enough values to unpack (expected at least %d, got %zd)",
before + after,
before + list_size);
return nullptr;
}
/* Pop the "after-variable" args off the list. */
for (int j = after; j > 0; j--) {
tuple->ob_item[ti++] = PyList_GET_ITEM(list, list_size - j);
}
/* Resize the list. */
Py_SIZE(list) = list_size - after;
return reinterpret_cast<PyObject*>(tuple.release());
}
int JITRT_UnicodeEquals(PyObject* s1, PyObject* s2, int equals) {
// one of these must be unicode for the quality comparison to be okay
assert(PyUnicode_CheckExact(s1) || PyUnicode_CheckExact(s2));
if (s1 == s2) {
return equals == Py_EQ;
}
if (PyUnicode_CheckExact(s1) && PyUnicode_CheckExact(s2)) {
if (PyUnicode_READY(s1) < 0 || PyUnicode_READY(s2) < 0)
return -1;
Py_ssize_t length = PyUnicode_GET_LENGTH(s1);
if (length != PyUnicode_GET_LENGTH(s2)) {
return equals == Py_NE;
}
Py_hash_t hash1 = ((PyASCIIObject*)s1)->hash;
Py_hash_t hash2 = ((PyASCIIObject*)s2)->hash;
if (hash1 != hash2 && hash1 != -1 && hash2 != -1) {
return equals == Py_NE;
}
int kind = PyUnicode_KIND(s1);
if (kind != PyUnicode_KIND(s2)) {
return equals == Py_NE;
}
void* data1 = PyUnicode_DATA(s1);
void* data2 = PyUnicode_DATA(s2);
if (PyUnicode_READ(kind, data1, 0) != PyUnicode_READ(kind, data2, 0)) {
return equals == Py_NE;
} else if (length == 1) {
return equals == Py_EQ;
} else {
int result = memcmp(data1, data2, (size_t)(length * kind));
return (equals == Py_EQ) ? (result == 0) : (result != 0);
}
}
return PyObject_RichCompareBool(s1, s2, equals);
}
int JITRT_NotContains(PyObject* w, PyObject* v) {
int res = PySequence_Contains(w, v);
if (res == -1) {
return -1;
}
return !res;
}
/* Perform a rich comparison with integer result. This wraps
PyObject_RichCompare(), returning -1 for error, 0 for false, 1 for true. */
int JITRT_RichCompareBool(PyObject* v, PyObject* w, int op) {
Ref<> res = Ref<>::steal(PyObject_RichCompare(v, w, op));
if (res == nullptr) {
return -1;
} else if (PyBool_Check(res)) {
return res == Py_True;
}
return PyObject_IsTrue(res);
}
/* perform a batch decref to the objects in args */
void JITRT_BatchDecref(PyObject** args, int nargs) {
for (int i = 0; i < nargs; i++) {
Py_DECREF(args[i]);
}
}