in Jit/codegen/gen_asm.cpp [573:849]
void NativeGenerator::generatePrologue(
Label correct_arg_count,
Label native_entry_point) {
PyCodeObject* code = GetFunction()->code;
// the generic entry point, including primitive return boxing if needed
asmjit::BaseNode* entry_cursor = as_->cursor();
// same as entry_cursor but only set if we are boxing a primitive return
asmjit::BaseNode* box_entry_cursor = nullptr;
// start of the "real" generic entry, after the return-boxing wrapper
asmjit::BaseNode* generic_entry_cursor = nullptr;
bool returns_primitive = func_->returnsPrimitive();
bool returns_double = func_->returnsPrimitiveDouble();
if (returns_primitive) {
// If we return a primitive, then in the generic (non-static) entry path we
// need to box it up (since our caller can't handle an actual primitive
// return). We do this by generating a small wrapper "function" here that
// just calls the real function and then boxes the return value before
// returning.
Label generic_entry = as_->newLabel();
Label box_done = as_->newLabel();
Label error = as_->newLabel();
jit::hir::Type ret_type = func_->return_type;
Annotations annot;
uint64_t box_func;
bool returns_enum = ret_type <= TCEnum;
generateFunctionEntry();
if (returns_enum) {
as_->push(x86::rdx);
as_->push(x86::rdx); // extra push to maintain alignment
annot.add("saveRegisters", as_, entry_cursor);
}
as_->call(generic_entry);
// if there was an error, there's nothing to box
if (returns_double) {
as_->ptest(x86::xmm1, x86::xmm1);
as_->je(error);
} else if (returns_enum) {
as_->test(x86::edx, x86::edx);
as_->je(error);
} else {
as_->test(x86::edx, x86::edx);
as_->je(box_done);
}
if (ret_type <= TCBool) {
as_->movzx(x86::edi, x86::al);
box_func = reinterpret_cast<uint64_t>(JITRT_BoxBool);
} else if (ret_type <= TCInt8) {
as_->movsx(x86::edi, x86::al);
box_func = reinterpret_cast<uint64_t>(JITRT_BoxI32);
} else if (ret_type <= TCUInt8) {
as_->movzx(x86::edi, x86::al);
box_func = reinterpret_cast<uint64_t>(JITRT_BoxU32);
} else if (ret_type <= TCInt16) {
as_->movsx(x86::edi, x86::ax);
box_func = reinterpret_cast<uint64_t>(JITRT_BoxI32);
} else if (ret_type <= TCUInt16) {
as_->movzx(x86::edi, x86::ax);
box_func = reinterpret_cast<uint64_t>(JITRT_BoxU32);
} else if (ret_type <= TCInt32) {
as_->mov(x86::edi, x86::eax);
box_func = reinterpret_cast<uint64_t>(JITRT_BoxI32);
} else if (ret_type <= TCUInt32) {
as_->mov(x86::edi, x86::eax);
box_func = reinterpret_cast<uint64_t>(JITRT_BoxU32);
} else if (ret_type <= TCInt64) {
as_->mov(x86::rdi, x86::rax);
box_func = reinterpret_cast<uint64_t>(JITRT_BoxI64);
} else if (ret_type <= TCUInt64) {
as_->mov(x86::rdi, x86::rax);
box_func = reinterpret_cast<uint64_t>(JITRT_BoxU64);
} else if (returns_double) {
// xmm0 already contains the return value
box_func = reinterpret_cast<uint64_t>(JITRT_BoxDouble);
} else if (returns_enum) {
as_->mov(x86::rdi, x86::rax);
Label box_int = as_->newLabel();
as_->pop(x86::rdx);
as_->pop(x86::rdx);
as_->bt(x86::rdx, _Py_VECTORCALL_INVOKED_STATICALLY_BIT_POS);
as_->jb(box_int);
as_->mov(x86::rsi, reinterpret_cast<uint64_t>(ret_type.typeSpec()));
as_->call(reinterpret_cast<uint64_t>(JITRT_BoxEnum));
as_->jmp(box_done);
as_->bind(box_int);
box_func = reinterpret_cast<uint64_t>(JITRT_BoxI64);
} else {
JIT_CHECK(
false, "unsupported primitive return type %s", ret_type.toString());
}
as_->call(box_func);
as_->bind(box_done);
as_->leave();
as_->ret();
if (returns_double) {
as_->bind(error);
as_->xor_(x86::rax, x86::rax);
as_->leave();
as_->ret();
} else if (returns_enum) {
as_->bind(error);
as_->pop(x86::rdx);
as_->pop(x86::rdx);
as_->leave();
as_->ret();
}
box_entry_cursor = entry_cursor;
generic_entry_cursor = as_->cursor();
as_->bind(generic_entry);
} else {
generic_entry_cursor = entry_cursor;
}
generateFunctionEntry();
Label setup_frame = as_->newLabel();
Label argCheck = as_->newLabel();
if (code->co_flags & CO_STATICALLY_COMPILED) {
// If we've been invoked statically we can skip all of the
// argument checking because we know our args have been
// provided correctly. But if we have primitives we need to
// unbox them from their boxed ints. We usually get to
// avoid this by doing direct invokes from JITed code.
if (func_->has_primitive_args) {
env_.code_rt->addReference(func_->prim_args_info);
as_->mov(
x86::r8, reinterpret_cast<uint64_t>(func_->prim_args_info.get()));
if (func_->returnsPrimitiveDouble()) {
as_->call(reinterpret_cast<uint64_t>(
JITRT_CallStaticallyWithPrimitiveSignatureFP));
} else {
as_->call(reinterpret_cast<uint64_t>(
JITRT_CallStaticallyWithPrimitiveSignature));
}
as_->leave();
as_->ret();
} else {
as_->bt(x86::rdx, _Py_VECTORCALL_INVOKED_STATICALLY_BIT_POS);
as_->jb(setup_frame);
}
}
if (!func_->has_primitive_args) {
as_->test(x86::rcx, x86::rcx); // test for kwargs
if (!((code->co_flags & (CO_VARARGS | CO_VARKEYWORDS)) ||
code->co_kwonlyargcount)) {
// If we have varargs or var kwargs we need to dispatch
// through our helper regardless if kw args are provided to
// create the var args tuple and dict and free them on exit
//
// Similarly, if the function has keyword-only args, we dispatch
// through the helper to check that they were, in fact, passed via
// keyword arguments.
//
// There's a lot of other things that happen in
// the helper so there is potentially a lot of room for optimization
// here.
as_->je(argCheck);
}
// We don't check the length of the kwnames tuple here, normal callers will
// never pass the empty tuple. It is possible for odd callers to still pass
// the empty tuple in which case we'll just go through the slow binding
// path.
as_->call(reinterpret_cast<uint64_t>(JITRT_CallWithKeywordArgs));
as_->leave();
as_->ret();
// check that we have a valid number of args
if (!(code->co_flags & (CO_VARARGS | CO_VARKEYWORDS))) {
as_->bind(argCheck);
as_->cmp(x86::edx, GetFunction()->numArgs());
// We don't have the correct number of arguments. Call a helper to either
// fix them up with defaults or raise an approprate exception.
as_->jz(correct_arg_count);
as_->mov(x86::rcx, GetFunction()->numArgs());
as_->call(
(returns_double
? reinterpret_cast<uint64_t>(
JITRT_CallWithIncorrectArgcountFPReturn)
: reinterpret_cast<uint64_t>(JITRT_CallWithIncorrectArgcount)));
as_->leave();
as_->ret();
}
}
as_->bind(correct_arg_count);
if (code->co_flags & CO_STATICALLY_COMPILED) {
if (!func_->has_primitive_args) {
// We weren't called statically, but we've now resolved
// all arguments to fixed offsets. Validate that the
// arguments are correctly typed.
generateStaticMethodTypeChecks(setup_frame);
} else if (func_->has_primitive_first_arg) {
as_->mov(x86::rdx, 0);
}
}
env_.addAnnotation("Generic entry", generic_entry_cursor);
if (box_entry_cursor) {
env_.addAnnotation(
"Generic entry (box primitive return)", box_entry_cursor);
}
// Args are now validated, setup frame
auto frame_cursor = as_->cursor();
as_->bind(setup_frame);
constexpr auto kNargsfReg = x86::rdx;
constexpr auto kFuncPtrReg = x86::rax;
constexpr auto kArgsReg = x86::r10;
constexpr auto kArgsOverflowBaseReg = kArgsReg;
loadOrGenerateLinkFrame(
x86::r11,
{
{kNargsfReg, kNargsfReg},
{x86::rdi, kFuncPtrReg}, // func
{x86::rsi, kArgsReg} // args
});
// Move arguments into their expected registers and then use r10 as the base
// for additional args. Note for coroutines we leave nargsf in RDX.
size_t num_fp_regs = 0;
size_t num_gp_regs = 0;
size_t args_without_regs = 0;
const bool not_enough_regs_for_args =
forEachArgumentRegInfo([&](std::optional<x86::Reg> r, size_t i) {
if (!r) {
args_without_regs++;
return;
}
x86::Mem arg_ptr =
x86::ptr(kArgsReg, (i - args_without_regs) * kPointerSize);
if (r->isXmm() && num_fp_regs != FP_ARGUMENT_REG_COUNT) {
as_->movsd(x86::Xmm(r->id()), arg_ptr);
num_fp_regs++;
} else if (num_gp_regs != numGpRegsForArgs()) {
as_->mov(x86::Gpq(r->id()), arg_ptr);
num_gp_regs++;
}
});
if (not_enough_regs_for_args) {
// load the location of the remaining args, the backend will
// deal with loading them from here...
as_->lea(
kArgsOverflowBaseReg,
x86::ptr(kArgsReg, (num_fp_regs + num_gp_regs) * kPointerSize));
}
// Finally allocate the saved space required for the actual function
auto native_entry_cursor = as_->cursor();
as_->bind(native_entry_point);
setupFrameAndSaveCallerRegisters(x86::r11);
env_.addAnnotation("Link frame", frame_cursor);
env_.addAnnotation("Native entry", native_entry_cursor);
}