void NativeGenerator::generatePrologue()

in Jit/codegen/gen_asm.cpp [573:849]


void NativeGenerator::generatePrologue(
    Label correct_arg_count,
    Label native_entry_point) {
  PyCodeObject* code = GetFunction()->code;

  // the generic entry point, including primitive return boxing if needed
  asmjit::BaseNode* entry_cursor = as_->cursor();

  // same as entry_cursor but only set if we are boxing a primitive return
  asmjit::BaseNode* box_entry_cursor = nullptr;

  // start of the "real" generic entry, after the return-boxing wrapper
  asmjit::BaseNode* generic_entry_cursor = nullptr;

  bool returns_primitive = func_->returnsPrimitive();
  bool returns_double = func_->returnsPrimitiveDouble();

  if (returns_primitive) {
    // If we return a primitive, then in the generic (non-static) entry path we
    // need to box it up (since our caller can't handle an actual primitive
    // return). We do this by generating a small wrapper "function" here that
    // just calls the real function and then boxes the return value before
    // returning.
    Label generic_entry = as_->newLabel();
    Label box_done = as_->newLabel();
    Label error = as_->newLabel();
    jit::hir::Type ret_type = func_->return_type;
    Annotations annot;
    uint64_t box_func;

    bool returns_enum = ret_type <= TCEnum;

    generateFunctionEntry();
    if (returns_enum) {
      as_->push(x86::rdx);
      as_->push(x86::rdx); // extra push to maintain alignment
      annot.add("saveRegisters", as_, entry_cursor);
    }
    as_->call(generic_entry);

    // if there was an error, there's nothing to box
    if (returns_double) {
      as_->ptest(x86::xmm1, x86::xmm1);
      as_->je(error);
    } else if (returns_enum) {
      as_->test(x86::edx, x86::edx);
      as_->je(error);
    } else {
      as_->test(x86::edx, x86::edx);
      as_->je(box_done);
    }

    if (ret_type <= TCBool) {
      as_->movzx(x86::edi, x86::al);
      box_func = reinterpret_cast<uint64_t>(JITRT_BoxBool);
    } else if (ret_type <= TCInt8) {
      as_->movsx(x86::edi, x86::al);
      box_func = reinterpret_cast<uint64_t>(JITRT_BoxI32);
    } else if (ret_type <= TCUInt8) {
      as_->movzx(x86::edi, x86::al);
      box_func = reinterpret_cast<uint64_t>(JITRT_BoxU32);
    } else if (ret_type <= TCInt16) {
      as_->movsx(x86::edi, x86::ax);
      box_func = reinterpret_cast<uint64_t>(JITRT_BoxI32);
    } else if (ret_type <= TCUInt16) {
      as_->movzx(x86::edi, x86::ax);
      box_func = reinterpret_cast<uint64_t>(JITRT_BoxU32);
    } else if (ret_type <= TCInt32) {
      as_->mov(x86::edi, x86::eax);
      box_func = reinterpret_cast<uint64_t>(JITRT_BoxI32);
    } else if (ret_type <= TCUInt32) {
      as_->mov(x86::edi, x86::eax);
      box_func = reinterpret_cast<uint64_t>(JITRT_BoxU32);
    } else if (ret_type <= TCInt64) {
      as_->mov(x86::rdi, x86::rax);
      box_func = reinterpret_cast<uint64_t>(JITRT_BoxI64);
    } else if (ret_type <= TCUInt64) {
      as_->mov(x86::rdi, x86::rax);
      box_func = reinterpret_cast<uint64_t>(JITRT_BoxU64);
    } else if (returns_double) {
      // xmm0 already contains the return value
      box_func = reinterpret_cast<uint64_t>(JITRT_BoxDouble);
    } else if (returns_enum) {
      as_->mov(x86::rdi, x86::rax);

      Label box_int = as_->newLabel();
      as_->pop(x86::rdx);
      as_->pop(x86::rdx);
      as_->bt(x86::rdx, _Py_VECTORCALL_INVOKED_STATICALLY_BIT_POS);
      as_->jb(box_int);

      as_->mov(x86::rsi, reinterpret_cast<uint64_t>(ret_type.typeSpec()));
      as_->call(reinterpret_cast<uint64_t>(JITRT_BoxEnum));
      as_->jmp(box_done);

      as_->bind(box_int);
      box_func = reinterpret_cast<uint64_t>(JITRT_BoxI64);
    } else {
      JIT_CHECK(
          false, "unsupported primitive return type %s", ret_type.toString());
    }

    as_->call(box_func);

    as_->bind(box_done);
    as_->leave();
    as_->ret();

    if (returns_double) {
      as_->bind(error);
      as_->xor_(x86::rax, x86::rax);
      as_->leave();
      as_->ret();
    } else if (returns_enum) {
      as_->bind(error);
      as_->pop(x86::rdx);
      as_->pop(x86::rdx);
      as_->leave();
      as_->ret();
    }

    box_entry_cursor = entry_cursor;
    generic_entry_cursor = as_->cursor();
    as_->bind(generic_entry);
  } else {
    generic_entry_cursor = entry_cursor;
  }

  generateFunctionEntry();

  Label setup_frame = as_->newLabel();
  Label argCheck = as_->newLabel();

  if (code->co_flags & CO_STATICALLY_COMPILED) {
    // If we've been invoked statically we can skip all of the
    // argument checking because we know our args have been
    // provided correctly.  But if we have primitives we need to
    // unbox them from their boxed ints.  We usually get to
    // avoid this by doing direct invokes from JITed code.
    if (func_->has_primitive_args) {
      env_.code_rt->addReference(func_->prim_args_info);
      as_->mov(
          x86::r8, reinterpret_cast<uint64_t>(func_->prim_args_info.get()));
      if (func_->returnsPrimitiveDouble()) {
        as_->call(reinterpret_cast<uint64_t>(
            JITRT_CallStaticallyWithPrimitiveSignatureFP));
      } else {
        as_->call(reinterpret_cast<uint64_t>(
            JITRT_CallStaticallyWithPrimitiveSignature));
      }
      as_->leave();
      as_->ret();
    } else {
      as_->bt(x86::rdx, _Py_VECTORCALL_INVOKED_STATICALLY_BIT_POS);
      as_->jb(setup_frame);
    }
  }

  if (!func_->has_primitive_args) {
    as_->test(x86::rcx, x86::rcx); // test for kwargs
    if (!((code->co_flags & (CO_VARARGS | CO_VARKEYWORDS)) ||
          code->co_kwonlyargcount)) {
      // If we have varargs or var kwargs we need to dispatch
      // through our helper regardless if kw args are provided to
      // create the var args tuple and dict and free them on exit
      //
      // Similarly, if the function has keyword-only args, we dispatch
      // through the helper to check that they were, in fact, passed via
      // keyword arguments.
      //
      // There's a lot of other things that happen in
      // the helper so there is potentially a lot of room for optimization
      // here.
      as_->je(argCheck);
    }

    // We don't check the length of the kwnames tuple here, normal callers will
    // never pass the empty tuple.  It is possible for odd callers to still pass
    // the empty tuple in which case we'll just go through the slow binding
    // path.
    as_->call(reinterpret_cast<uint64_t>(JITRT_CallWithKeywordArgs));
    as_->leave();
    as_->ret();

    // check that we have a valid number of args
    if (!(code->co_flags & (CO_VARARGS | CO_VARKEYWORDS))) {
      as_->bind(argCheck);
      as_->cmp(x86::edx, GetFunction()->numArgs());

      // We don't have the correct number of arguments. Call a helper to either
      // fix them up with defaults or raise an approprate exception.
      as_->jz(correct_arg_count);
      as_->mov(x86::rcx, GetFunction()->numArgs());
      as_->call(
          (returns_double
               ? reinterpret_cast<uint64_t>(
                     JITRT_CallWithIncorrectArgcountFPReturn)
               : reinterpret_cast<uint64_t>(JITRT_CallWithIncorrectArgcount)));
      as_->leave();
      as_->ret();
    }
  }

  as_->bind(correct_arg_count);
  if (code->co_flags & CO_STATICALLY_COMPILED) {
    if (!func_->has_primitive_args) {
      // We weren't called statically, but we've now resolved
      // all arguments to fixed offsets.  Validate that the
      // arguments are correctly typed.
      generateStaticMethodTypeChecks(setup_frame);
    } else if (func_->has_primitive_first_arg) {
      as_->mov(x86::rdx, 0);
    }
  }

  env_.addAnnotation("Generic entry", generic_entry_cursor);

  if (box_entry_cursor) {
    env_.addAnnotation(
        "Generic entry (box primitive return)", box_entry_cursor);
  }

  // Args are now validated, setup frame
  auto frame_cursor = as_->cursor();
  as_->bind(setup_frame);

  constexpr auto kNargsfReg = x86::rdx;
  constexpr auto kFuncPtrReg = x86::rax;
  constexpr auto kArgsReg = x86::r10;
  constexpr auto kArgsOverflowBaseReg = kArgsReg;

  loadOrGenerateLinkFrame(
      x86::r11,
      {
          {kNargsfReg, kNargsfReg},
          {x86::rdi, kFuncPtrReg}, // func
          {x86::rsi, kArgsReg} // args
      });

  // Move arguments into their expected registers and then use r10 as the base
  // for additional args. Note for coroutines we leave nargsf in RDX.
  size_t num_fp_regs = 0;
  size_t num_gp_regs = 0;
  size_t args_without_regs = 0;
  const bool not_enough_regs_for_args =
      forEachArgumentRegInfo([&](std::optional<x86::Reg> r, size_t i) {
        if (!r) {
          args_without_regs++;
          return;
        }
        x86::Mem arg_ptr =
            x86::ptr(kArgsReg, (i - args_without_regs) * kPointerSize);
        if (r->isXmm() && num_fp_regs != FP_ARGUMENT_REG_COUNT) {
          as_->movsd(x86::Xmm(r->id()), arg_ptr);
          num_fp_regs++;
        } else if (num_gp_regs != numGpRegsForArgs()) {
          as_->mov(x86::Gpq(r->id()), arg_ptr);
          num_gp_regs++;
        }
      });
  if (not_enough_regs_for_args) {
    // load the location of the remaining args, the backend will
    // deal with loading them from here...
    as_->lea(
        kArgsOverflowBaseReg,
        x86::ptr(kArgsReg, (num_fp_regs + num_gp_regs) * kPointerSize));
  }

  // Finally allocate the saved space required for the actual function
  auto native_entry_cursor = as_->cursor();
  as_->bind(native_entry_point);

  setupFrameAndSaveCallerRegisters(x86::r11);

  env_.addAnnotation("Link frame", frame_cursor);
  env_.addAnnotation("Native entry", native_entry_cursor);
}