erts/emulator/asmjit/x86/x86internal.cpp (1,247 lines of code) (raw):
// AsmJit - Machine code generation for C++
//
// * Official AsmJit Home Page: https://asmjit.com
// * Official Github Repository: https://github.com/asmjit/asmjit
//
// Copyright (c) 2008-2020 The AsmJit Authors
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software. If you use this software
// in a product, an acknowledgment in the product documentation would be
// appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source distribution.
#include "../core/api-build_p.h"
#ifdef ASMJIT_BUILD_X86
#include "../core/formatter.h"
#include "../core/string.h"
#include "../core/support.h"
#include "../core/type.h"
#include "../x86/x86internal_p.h"
// Can be used for debugging...
// #define ASMJIT_DUMP_ARGS_ASSIGNMENT
ASMJIT_BEGIN_SUB_NAMESPACE(x86)
// ============================================================================
// [asmjit::X86Internal - Helpers]
// ============================================================================
static ASMJIT_INLINE uint32_t x86GetXmmMovInst(const FuncFrame& frame) {
bool avx = frame.isAvxEnabled();
bool aligned = frame.hasAlignedVecSR();
return aligned ? (avx ? Inst::kIdVmovaps : Inst::kIdMovaps)
: (avx ? Inst::kIdVmovups : Inst::kIdMovups);
}
static ASMJIT_INLINE uint32_t x86VecTypeIdToRegType(uint32_t typeId) noexcept {
return typeId <= Type::_kIdVec128End ? Reg::kTypeXmm :
typeId <= Type::_kIdVec256End ? Reg::kTypeYmm : Reg::kTypeZmm;
}
//! Converts `size` to a 'kmov?' instructio.
static inline uint32_t x86KmovFromSize(uint32_t size) noexcept {
switch (size) {
case 1: return Inst::kIdKmovb;
case 2: return Inst::kIdKmovw;
case 4: return Inst::kIdKmovd;
case 8: return Inst::kIdKmovq;
default: return Inst::kIdNone;
}
}
// ============================================================================
// [asmjit::X86Internal - FuncDetail]
// ============================================================================
ASMJIT_FAVOR_SIZE void unpackValues(FuncDetail& func, FuncValuePack& pack) noexcept {
uint32_t typeId = pack[0].typeId();
switch (typeId) {
case Type::kIdI64:
case Type::kIdU64: {
if (Environment::is32Bit(func.callConv().arch())) {
// Convert a 64-bit return value to two 32-bit return values.
pack[0].initTypeId(Type::kIdU32);
pack[1].initTypeId(typeId - 2);
break;
}
break;
}
}
}
ASMJIT_FAVOR_SIZE Error X86Internal::initFuncDetail(FuncDetail& func, const FuncSignature& signature, uint32_t registerSize) noexcept {
const CallConv& cc = func.callConv();
uint32_t arch = cc.arch();
uint32_t stackOffset = cc._spillZoneSize;
uint32_t argCount = func.argCount();
// Up to two return values can be returned in GP registers.
static const uint8_t gpReturnIndexes[4] = {
uint8_t(Gp::kIdAx),
uint8_t(Gp::kIdDx),
uint8_t(BaseReg::kIdBad),
uint8_t(BaseReg::kIdBad)
};
if (func.hasRet()) {
unpackValues(func, func._rets);
for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
uint32_t typeId = func._rets[valueIndex].typeId();
// Terminate at the first void type (end of the pack).
if (!typeId)
break;
switch (typeId) {
case Type::kIdI64:
case Type::kIdU64: {
if (gpReturnIndexes[valueIndex] != BaseReg::kIdBad)
func._rets[valueIndex].initReg(Reg::kTypeGpq, gpReturnIndexes[valueIndex], typeId);
else
return DebugUtils::errored(kErrorInvalidState);
break;
}
case Type::kIdI8:
case Type::kIdI16:
case Type::kIdI32: {
if (gpReturnIndexes[valueIndex] != BaseReg::kIdBad)
func._rets[valueIndex].initReg(Reg::kTypeGpd, gpReturnIndexes[valueIndex], Type::kIdI32);
else
return DebugUtils::errored(kErrorInvalidState);
break;
}
case Type::kIdU8:
case Type::kIdU16:
case Type::kIdU32: {
if (gpReturnIndexes[valueIndex] != BaseReg::kIdBad)
func._rets[valueIndex].initReg(Reg::kTypeGpd, gpReturnIndexes[valueIndex], Type::kIdU32);
else
return DebugUtils::errored(kErrorInvalidState);
break;
}
case Type::kIdF32:
case Type::kIdF64: {
uint32_t regType = Environment::is32Bit(arch) ? Reg::kTypeSt : Reg::kTypeXmm;
func._rets[valueIndex].initReg(regType, valueIndex, typeId);
break;
}
case Type::kIdF80: {
// 80-bit floats are always returned by FP0.
func._rets[valueIndex].initReg(Reg::kTypeSt, valueIndex, typeId);
break;
}
case Type::kIdMmx32:
case Type::kIdMmx64: {
// MM registers are returned through XMM (SystemV) or GPQ (Win64).
uint32_t regType = Reg::kTypeMm;
uint32_t regIndex = valueIndex;
if (Environment::is64Bit(arch)) {
regType = cc.strategy() == CallConv::kStrategyDefault ? Reg::kTypeXmm : Reg::kTypeGpq;
regIndex = cc.strategy() == CallConv::kStrategyDefault ? valueIndex : gpReturnIndexes[valueIndex];
if (regIndex == BaseReg::kIdBad)
return DebugUtils::errored(kErrorInvalidState);
}
func._rets[valueIndex].initReg(regType, regIndex, typeId);
break;
}
default: {
func._rets[valueIndex].initReg(x86VecTypeIdToRegType(typeId), valueIndex, typeId);
break;
}
}
}
}
switch (cc.strategy()) {
case CallConv::kStrategyDefault: {
uint32_t gpzPos = 0;
uint32_t vecPos = 0;
for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
unpackValues(func, func._args[argIndex]);
for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
FuncValue& arg = func._args[argIndex][valueIndex];
// Terminate if there are no more arguments in the pack.
if (!arg)
break;
uint32_t typeId = arg.typeId();
if (Type::isInt(typeId)) {
uint32_t regId = BaseReg::kIdBad;
if (gpzPos < CallConv::kMaxRegArgsPerGroup)
regId = cc._passedOrder[Reg::kGroupGp].id[gpzPos];
if (regId != BaseReg::kIdBad) {
uint32_t regType = (typeId <= Type::kIdU32) ? Reg::kTypeGpd : Reg::kTypeGpq;
arg.assignRegData(regType, regId);
func.addUsedRegs(Reg::kGroupGp, Support::bitMask(regId));
gpzPos++;
}
else {
uint32_t size = Support::max<uint32_t>(Type::sizeOf(typeId), registerSize);
arg.assignStackOffset(int32_t(stackOffset));
stackOffset += size;
}
continue;
}
if (Type::isFloat(typeId) || Type::isVec(typeId)) {
uint32_t regId = BaseReg::kIdBad;
if (vecPos < CallConv::kMaxRegArgsPerGroup)
regId = cc._passedOrder[Reg::kGroupVec].id[vecPos];
if (Type::isFloat(typeId)) {
// If this is a float, but `kFlagPassFloatsByVec` is false, we have
// to use stack instead. This should be only used by 32-bit calling
// conventions.
if (!cc.hasFlag(CallConv::kFlagPassFloatsByVec))
regId = BaseReg::kIdBad;
}
else {
// Pass vector registers via stack if this is a variable arguments
// function. This should be only used by 32-bit calling conventions.
if (signature.hasVarArgs() && cc.hasFlag(CallConv::kFlagPassVecByStackIfVA))
regId = BaseReg::kIdBad;
}
if (regId != BaseReg::kIdBad) {
arg.initTypeId(typeId);
arg.assignRegData(x86VecTypeIdToRegType(typeId), regId);
func.addUsedRegs(Reg::kGroupVec, Support::bitMask(regId));
vecPos++;
}
else {
uint32_t size = Type::sizeOf(typeId);
arg.assignStackOffset(int32_t(stackOffset));
stackOffset += size;
}
continue;
}
}
}
break;
}
case CallConv::kStrategyX64Windows:
case CallConv::kStrategyX64VectorCall: {
// Both X64 and VectorCall behave similarly - arguments are indexed
// from left to right. The position of the argument determines in
// which register the argument is allocated, so it's either GP or
// one of XMM/YMM/ZMM registers.
//
// [ X64 ] [VecCall]
// Index: #0 #1 #2 #3 #4 #5
//
// GP : RCX RDX R8 R9
// VEC : XMM0 XMM1 XMM2 XMM3 XMM4 XMM5
//
// For example function `f(int a, double b, int c, double d)` will be:
//
// (a) (b) (c) (d)
// RCX XMM1 R8 XMM3
//
// Unused vector registers are used by HVA.
bool isVectorCall = (cc.strategy() == CallConv::kStrategyX64VectorCall);
for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
unpackValues(func, func._args[argIndex]);
for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
FuncValue& arg = func._args[argIndex][valueIndex];
// Terminate if there are no more arguments in the pack.
if (!arg)
break;
uint32_t typeId = arg.typeId();
uint32_t size = Type::sizeOf(typeId);
if (Type::isInt(typeId) || Type::isMmx(typeId)) {
uint32_t regId = BaseReg::kIdBad;
if (argIndex < CallConv::kMaxRegArgsPerGroup)
regId = cc._passedOrder[Reg::kGroupGp].id[argIndex];
if (regId != BaseReg::kIdBad) {
uint32_t regType = (size <= 4 && !Type::isMmx(typeId)) ? Reg::kTypeGpd : Reg::kTypeGpq;
arg.assignRegData(regType, regId);
func.addUsedRegs(Reg::kGroupGp, Support::bitMask(regId));
}
else {
arg.assignStackOffset(int32_t(stackOffset));
stackOffset += 8;
}
continue;
}
if (Type::isFloat(typeId) || Type::isVec(typeId)) {
uint32_t regId = BaseReg::kIdBad;
if (argIndex < CallConv::kMaxRegArgsPerGroup)
regId = cc._passedOrder[Reg::kGroupVec].id[argIndex];
if (regId != BaseReg::kIdBad) {
// X64-ABI doesn't allow vector types (XMM|YMM|ZMM) to be passed
// via registers, however, VectorCall was designed for that purpose.
if (Type::isFloat(typeId) || isVectorCall) {
uint32_t regType = x86VecTypeIdToRegType(typeId);
arg.assignRegData(regType, regId);
func.addUsedRegs(Reg::kGroupVec, Support::bitMask(regId));
continue;
}
}
// Passed via stack if the argument is float/double or indirectly.
// The trap is - if the argument is passed indirectly, the address
// can be passed via register, if the argument's index has GP one.
if (Type::isFloat(typeId)) {
arg.assignStackOffset(int32_t(stackOffset));
}
else {
uint32_t gpRegId = cc._passedOrder[Reg::kGroupGp].id[argIndex];
if (gpRegId != BaseReg::kIdBad)
arg.assignRegData(Reg::kTypeGpq, gpRegId);
else
arg.assignStackOffset(int32_t(stackOffset));
arg.addFlags(FuncValue::kFlagIsIndirect);
}
// Always 8 bytes (float/double/pointer).
stackOffset += 8;
continue;
}
}
}
break;
}
}
func._argStackSize = stackOffset;
return kErrorOk;
}
// ============================================================================
// [asmjit::X86FuncArgsContext]
// ============================================================================
static RegInfo x86GetRegForMemToMemMove(uint32_t arch, uint32_t dstTypeId, uint32_t srcTypeId) noexcept {
uint32_t dstSize = Type::sizeOf(dstTypeId);
uint32_t srcSize = Type::sizeOf(srcTypeId);
uint32_t maxSize = Support::max<uint32_t>(dstSize, srcSize);
uint32_t regSize = Environment::registerSizeFromArch(arch);
uint32_t signature = 0;
if (maxSize <= regSize || (Type::isInt(dstTypeId) && Type::isInt(srcTypeId)))
signature = maxSize <= 4 ? Gpd::kSignature : Gpq::kSignature;
else if (maxSize <= 16)
signature = Xmm::kSignature;
else if (maxSize <= 32)
signature = Ymm::kSignature;
else if (maxSize <= 64)
signature = Zmm::kSignature;
return RegInfo { signature };
}
// Used by both `argsToFuncFrame()` and `emitArgsAssignment()`.
class X86FuncArgsContext {
public:
enum VarId : uint32_t {
kVarIdNone = 0xFF
};
//! Contains information about a single argument or SA register that may need shuffling.
struct Var {
inline void init(const FuncValue& cur_, const FuncValue& out_) noexcept {
cur = cur_;
out = out_;
}
//! Reset the value to its unassigned state.
inline void reset() noexcept {
cur.reset();
out.reset();
}
inline bool isDone() const noexcept { return cur.isDone(); }
inline void markDone() noexcept { cur.addFlags(FuncValue::kFlagIsDone); }
FuncValue cur;
FuncValue out;
};
struct WorkData {
inline void reset() noexcept {
_archRegs = 0;
_workRegs = 0;
_usedRegs = 0;
_assignedRegs = 0;
_dstRegs = 0;
_dstShuf = 0;
_numSwaps = 0;
_numStackArgs = 0;
memset(_reserved, 0, sizeof(_reserved));
memset(_physToVarId, kVarIdNone, 32);
}
inline bool isAssigned(uint32_t regId) const noexcept {
ASMJIT_ASSERT(regId < 32);
return Support::bitTest(_assignedRegs, regId);
}
inline void assign(uint32_t varId, uint32_t regId) noexcept {
ASMJIT_ASSERT(!isAssigned(regId));
ASMJIT_ASSERT(_physToVarId[regId] == kVarIdNone);
_physToVarId[regId] = uint8_t(varId);
_assignedRegs ^= Support::bitMask(regId);
}
inline void reassign(uint32_t varId, uint32_t newId, uint32_t oldId) noexcept {
ASMJIT_ASSERT( isAssigned(oldId));
ASMJIT_ASSERT(!isAssigned(newId));
ASMJIT_ASSERT(_physToVarId[oldId] == varId);
ASMJIT_ASSERT(_physToVarId[newId] == kVarIdNone);
_physToVarId[oldId] = uint8_t(kVarIdNone);
_physToVarId[newId] = uint8_t(varId);
_assignedRegs ^= Support::bitMask(newId) ^ Support::bitMask(oldId);
}
inline void swap(uint32_t aVarId, uint32_t aRegId, uint32_t bVarId, uint32_t bRegId) noexcept {
ASMJIT_ASSERT(isAssigned(aRegId));
ASMJIT_ASSERT(isAssigned(bRegId));
ASMJIT_ASSERT(_physToVarId[aRegId] == aVarId);
ASMJIT_ASSERT(_physToVarId[bRegId] == bVarId);
_physToVarId[aRegId] = uint8_t(bVarId);
_physToVarId[bRegId] = uint8_t(aVarId);
}
inline void unassign(uint32_t varId, uint32_t regId) noexcept {
ASMJIT_ASSERT(isAssigned(regId));
ASMJIT_ASSERT(_physToVarId[regId] == varId);
DebugUtils::unused(varId);
_physToVarId[regId] = uint8_t(kVarIdNone);
_assignedRegs ^= Support::bitMask(regId);
}
inline uint32_t archRegs() const noexcept { return _archRegs; }
inline uint32_t workRegs() const noexcept { return _workRegs; }
inline uint32_t usedRegs() const noexcept { return _usedRegs; }
inline uint32_t assignedRegs() const noexcept { return _assignedRegs; }
inline uint32_t dstRegs() const noexcept { return _dstRegs; }
inline uint32_t availableRegs() const noexcept { return _workRegs & ~_assignedRegs; }
uint32_t _archRegs; //!< All allocable registers provided by the architecture.
uint32_t _workRegs; //!< All registers that can be used by the shuffler.
uint32_t _usedRegs; //!< Registers used by the shuffler (all).
uint32_t _assignedRegs; //!< Assigned registers.
uint32_t _dstRegs; //!< Destination registers assigned to arguments or SA.
uint32_t _dstShuf; //!< Destination registers that require shuffling.
uint8_t _numSwaps; //!< Number of register swaps.
uint8_t _numStackArgs; //!< Number of stack loads.
uint8_t _reserved[6]; //!< Reserved (only used as padding).
uint8_t _physToVarId[32]; //!< Physical ID to variable ID mapping.
};
uint8_t _arch;
bool _hasStackSrc; //!< Has arguments passed via stack (SRC).
bool _hasPreservedFP; //!< Has preserved frame-pointer (FP).
uint8_t _stackDstMask; //!< Has arguments assigned to stack (DST).
uint8_t _regSwapsMask; //!< Register swap groups (bit-mask).
uint8_t _saVarId;
uint32_t _varCount;
WorkData _workData[BaseReg::kGroupVirt];
Var _vars[Globals::kMaxFuncArgs + 1];
X86FuncArgsContext() noexcept;
inline uint32_t arch() const noexcept { return _arch; }
inline uint32_t varCount() const noexcept { return _varCount; }
inline Var& var(size_t varId) noexcept { return _vars[varId]; }
inline const Var& var(size_t varId) const noexcept { return _vars[varId]; }
inline size_t indexOf(const Var* var) const noexcept { return (size_t)(var - _vars); }
Error initWorkData(const FuncFrame& frame, const FuncArgsAssignment& args) noexcept;
Error markScratchRegs(FuncFrame& frame) noexcept;
Error markDstRegsDirty(FuncFrame& frame) noexcept;
Error markStackArgsReg(FuncFrame& frame) noexcept;
};
X86FuncArgsContext::X86FuncArgsContext() noexcept {
_arch = Environment::kArchUnknown;
_varCount = 0;
_hasStackSrc = false;
_hasPreservedFP = false;
_stackDstMask = 0;
_regSwapsMask = 0;
_saVarId = kVarIdNone;
for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++)
_workData[group].reset();
}
ASMJIT_FAVOR_SIZE Error X86FuncArgsContext::initWorkData(const FuncFrame& frame, const FuncArgsAssignment& args) noexcept {
// The code has to be updated if this changes.
ASMJIT_ASSERT(BaseReg::kGroupVirt == 4);
const FuncDetail& func = *args.funcDetail();
// Initialize Architecture.
uint32_t arch = func.callConv().arch();
uint32_t archRegCount = Environment::is32Bit(arch) ? 8 : 16;
_arch = uint8_t(arch);
// Initialize `_archRegs`.
_workData[Reg::kGroupGp ]._archRegs = Support::lsbMask<uint32_t>(archRegCount) & ~Support::bitMask(Gp::kIdSp);
_workData[Reg::kGroupVec ]._archRegs = Support::lsbMask<uint32_t>(archRegCount);
_workData[Reg::kGroupMm ]._archRegs = Support::lsbMask<uint32_t>(8);
_workData[Reg::kGroupKReg]._archRegs = Support::lsbMask<uint32_t>(8);
if (frame.hasPreservedFP())
_workData[Reg::kGroupGp]._archRegs &= ~Support::bitMask(Gp::kIdBp);
// Extract information from all function arguments/assignments and build Var[] array.
uint32_t varId = 0;
for (uint32_t argIndex = 0; argIndex < Globals::kMaxFuncArgs; argIndex++) {
for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
const FuncValue& dst_ = args.arg(argIndex, valueIndex);
if (!dst_.isAssigned())
continue;
const FuncValue& src_ = func.arg(argIndex, valueIndex);
if (ASMJIT_UNLIKELY(!src_.isAssigned()))
return DebugUtils::errored(kErrorInvalidState);
Var& var = _vars[varId];
var.init(src_, dst_);
FuncValue& src = var.cur;
FuncValue& dst = var.out;
uint32_t dstGroup = 0xFFFFFFFFu;
uint32_t dstId = BaseReg::kIdBad;
WorkData* dstWd = nullptr;
// Not supported.
if (src.isIndirect())
return DebugUtils::errored(kErrorInvalidAssignment);
if (dst.isReg()) {
uint32_t dstType = dst.regType();
if (ASMJIT_UNLIKELY(dstType >= Reg::kTypeCount))
return DebugUtils::errored(kErrorInvalidRegType);
// Copy TypeId from source if the destination doesn't have it. The RA
// used by BaseCompiler would never leave TypeId undefined, but users
// of FuncAPI can just assign phys regs without specifying the type.
if (!dst.hasTypeId())
dst.setTypeId(Reg::typeIdOf(dst.regType()));
dstGroup = Reg::groupOf(dstType);
if (ASMJIT_UNLIKELY(dstGroup >= BaseReg::kGroupVirt))
return DebugUtils::errored(kErrorInvalidRegGroup);
dstWd = &_workData[dstGroup];
dstId = dst.regId();
if (ASMJIT_UNLIKELY(dstId >= 32 || !Support::bitTest(dstWd->archRegs(), dstId)))
return DebugUtils::errored(kErrorInvalidPhysId);
if (ASMJIT_UNLIKELY(Support::bitTest(dstWd->dstRegs(), dstId)))
return DebugUtils::errored(kErrorOverlappedRegs);
dstWd->_dstRegs |= Support::bitMask(dstId);
dstWd->_dstShuf |= Support::bitMask(dstId);
dstWd->_usedRegs |= Support::bitMask(dstId);
}
else {
if (!dst.hasTypeId())
dst.setTypeId(src.typeId());
RegInfo regInfo = x86GetRegForMemToMemMove(arch, dst.typeId(), src.typeId());
if (ASMJIT_UNLIKELY(!regInfo.isValid()))
return DebugUtils::errored(kErrorInvalidState);
_stackDstMask = uint8_t(_stackDstMask | Support::bitMask(regInfo.group()));
}
if (src.isReg()) {
uint32_t srcId = src.regId();
uint32_t srcGroup = Reg::groupOf(src.regType());
if (dstGroup == srcGroup) {
dstWd->assign(varId, srcId);
// The best case, register is allocated where it is expected to be.
if (dstId == srcId)
var.markDone();
}
else {
if (ASMJIT_UNLIKELY(srcGroup >= BaseReg::kGroupVirt))
return DebugUtils::errored(kErrorInvalidState);
WorkData& srcData = _workData[srcGroup];
srcData.assign(varId, srcId);
}
}
else {
if (dstWd)
dstWd->_numStackArgs++;
_hasStackSrc = true;
}
varId++;
}
}
// Initialize WorkData::workRegs.
uint32_t i;
for (i = 0; i < BaseReg::kGroupVirt; i++)
_workData[i]._workRegs = (_workData[i].archRegs() & (frame.dirtyRegs(i) | ~frame.preservedRegs(i))) | _workData[i].dstRegs() | _workData[i].assignedRegs();
// Create a variable that represents `SARegId` if necessary.
bool saRegRequired = _hasStackSrc && frame.hasDynamicAlignment() && !frame.hasPreservedFP();
WorkData& gpRegs = _workData[BaseReg::kGroupGp];
uint32_t saCurRegId = frame.saRegId();
uint32_t saOutRegId = args.saRegId();
if (saCurRegId != BaseReg::kIdBad) {
// Check if the provided `SARegId` doesn't collide with input registers.
if (ASMJIT_UNLIKELY(gpRegs.isAssigned(saCurRegId)))
return DebugUtils::errored(kErrorOverlappedRegs);
}
if (saOutRegId != BaseReg::kIdBad) {
// Check if the provided `SARegId` doesn't collide with argument assignments.
if (ASMJIT_UNLIKELY(Support::bitTest(gpRegs.dstRegs(), saOutRegId)))
return DebugUtils::errored(kErrorOverlappedRegs);
saRegRequired = true;
}
if (saRegRequired) {
uint32_t ptrTypeId = Environment::is32Bit(arch) ? Type::kIdU32 : Type::kIdU64;
uint32_t ptrRegType = Environment::is32Bit(arch) ? BaseReg::kTypeGp32 : BaseReg::kTypeGp64;
_saVarId = uint8_t(varId);
_hasPreservedFP = frame.hasPreservedFP();
Var& var = _vars[varId];
var.reset();
if (saCurRegId == BaseReg::kIdBad) {
if (saOutRegId != BaseReg::kIdBad && !gpRegs.isAssigned(saOutRegId)) {
saCurRegId = saOutRegId;
}
else {
uint32_t availableRegs = gpRegs.availableRegs();
if (!availableRegs)
availableRegs = gpRegs.archRegs() & ~gpRegs.workRegs();
if (ASMJIT_UNLIKELY(!availableRegs))
return DebugUtils::errored(kErrorNoMorePhysRegs);
saCurRegId = Support::ctz(availableRegs);
}
}
var.cur.initReg(ptrRegType, saCurRegId, ptrTypeId);
gpRegs.assign(varId, saCurRegId);
gpRegs._workRegs |= Support::bitMask(saCurRegId);
if (saOutRegId != BaseReg::kIdBad) {
var.out.initReg(ptrRegType, saOutRegId, ptrTypeId);
gpRegs._dstRegs |= Support::bitMask(saOutRegId);
gpRegs._workRegs |= Support::bitMask(saOutRegId);
}
else {
var.markDone();
}
varId++;
}
_varCount = varId;
// Detect register swaps.
for (varId = 0; varId < _varCount; varId++) {
Var& var = _vars[varId];
if (var.cur.isReg() && var.out.isReg()) {
uint32_t srcId = var.cur.regId();
uint32_t dstId = var.out.regId();
uint32_t group = Reg::groupOf(var.cur.regType());
if (group != Reg::groupOf(var.out.regType()))
continue;
WorkData& wd = _workData[group];
if (wd.isAssigned(dstId)) {
Var& other = _vars[wd._physToVarId[dstId]];
if (Reg::groupOf(other.out.regType()) == group && other.out.regId() == srcId) {
wd._numSwaps++;
_regSwapsMask = uint8_t(_regSwapsMask | Support::bitMask(group));
}
}
}
}
return kErrorOk;
}
ASMJIT_FAVOR_SIZE Error X86FuncArgsContext::markDstRegsDirty(FuncFrame& frame) noexcept {
for (uint32_t i = 0; i < BaseReg::kGroupVirt; i++) {
WorkData& wd = _workData[i];
uint32_t regs = wd.usedRegs() | wd._dstShuf;
wd._workRegs |= regs;
frame.addDirtyRegs(i, regs);
}
return kErrorOk;
}
ASMJIT_FAVOR_SIZE Error X86FuncArgsContext::markScratchRegs(FuncFrame& frame) noexcept {
uint32_t groupMask = 0;
// Handle stack to stack moves.
groupMask |= _stackDstMask;
// Handle register swaps.
groupMask |= _regSwapsMask & ~Support::bitMask(BaseReg::kGroupGp);
if (!groupMask)
return kErrorOk;
// Selects one dirty register per affected group that can be used as a scratch register.
for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) {
if (Support::bitTest(groupMask, group)) {
WorkData& wd = _workData[group];
// Initially, pick some clobbered or dirty register.
uint32_t workRegs = wd.workRegs();
uint32_t regs = workRegs & ~(wd.usedRegs() | wd._dstShuf);
// If that didn't work out pick some register which is not in 'used'.
if (!regs)
regs = workRegs & ~wd.usedRegs();
// If that didn't work out pick any other register that is allocable.
// This last resort case will, however, result in marking one more
// register dirty.
if (!regs)
regs = wd.archRegs() & ~workRegs;
// If that didn't work out we will have to use XORs instead of MOVs.
if (!regs)
continue;
uint32_t regMask = Support::blsi(regs);
wd._workRegs |= regMask;
frame.addDirtyRegs(group, regMask);
}
}
return kErrorOk;
}
ASMJIT_FAVOR_SIZE Error X86FuncArgsContext::markStackArgsReg(FuncFrame& frame) noexcept {
if (_saVarId != kVarIdNone) {
const Var& var = _vars[_saVarId];
frame.setSARegId(var.cur.regId());
}
else if (frame.hasPreservedFP()) {
// Always EBP|RBP if the frame-pointer isn't omitted.
frame.setSARegId(Gp::kIdBp);
}
return kErrorOk;
}
// ============================================================================
// [asmjit::X86Internal - FrameLayout]
// ============================================================================
ASMJIT_FAVOR_SIZE Error X86Internal::initFuncFrame(FuncFrame& frame, const FuncDetail& func) noexcept {
uint32_t arch = func.callConv().arch();
// Initializing FuncFrame means making a copy of some properties of `func`.
// Properties like `_localStackSize` will be set by the user before the frame
// is finalized.
frame.reset();
frame._arch = uint8_t(arch);
frame._spRegId = Gp::kIdSp;
frame._saRegId = Gp::kIdBad;
uint32_t naturalStackAlignment = func.callConv().naturalStackAlignment();
uint32_t minDynamicAlignment = Support::max<uint32_t>(naturalStackAlignment, 16);
if (minDynamicAlignment == naturalStackAlignment)
minDynamicAlignment <<= 1;
frame._naturalStackAlignment = uint8_t(naturalStackAlignment);
frame._minDynamicAlignment = uint8_t(minDynamicAlignment);
frame._redZoneSize = uint8_t(func.redZoneSize());
frame._spillZoneSize = uint8_t(func.spillZoneSize());
frame._finalStackAlignment = uint8_t(frame._naturalStackAlignment);
if (func.hasFlag(CallConv::kFlagCalleePopsStack)) {
frame._calleeStackCleanup = uint16_t(func.argStackSize());
}
// Initial masks of dirty and preserved registers.
for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) {
frame._dirtyRegs[group] = func.usedRegs(group);
frame._preservedRegs[group] = func.preservedRegs(group);
}
// Exclude ESP/RSP - this register is never included in saved GP regs.
frame._preservedRegs[BaseReg::kGroupGp] &= ~Support::bitMask(Gp::kIdSp);
return kErrorOk;
}
ASMJIT_FAVOR_SIZE Error X86Internal::finalizeFuncFrame(FuncFrame& frame) noexcept {
uint32_t registerSize = Environment::registerSizeFromArch(frame.arch());
// The final stack alignment must be updated accordingly to call and local stack alignments.
uint32_t stackAlignment = frame._finalStackAlignment;
ASMJIT_ASSERT(stackAlignment == Support::max(frame._naturalStackAlignment,
frame._callStackAlignment,
frame._localStackAlignment));
// TODO: Must be configurable.
uint32_t vecSize = 16;
bool hasFP = frame.hasPreservedFP();
bool hasDA = frame.hasDynamicAlignment();
// Include EBP|RBP if the function preserves the frame-pointer.
if (hasFP)
frame._dirtyRegs[Reg::kGroupGp] |= Support::bitMask(Gp::kIdBp);
// These two are identical if the function doesn't align its stack dynamically.
uint32_t saRegId = frame.saRegId();
if (saRegId == BaseReg::kIdBad)
saRegId = Gp::kIdSp;
// Fix stack arguments base-register from ESP|RSP to EBP|RBP in case it was
// not picked before and the function performs dynamic stack alignment.
if (hasDA && saRegId == Gp::kIdSp)
saRegId = Gp::kIdBp;
// Mark as dirty any register but ESP|RSP if used as SA pointer.
if (saRegId != Gp::kIdSp)
frame._dirtyRegs[Reg::kGroupGp] |= Support::bitMask(saRegId);
frame._spRegId = uint8_t(Gp::kIdSp);
frame._saRegId = uint8_t(saRegId);
// Setup stack size used to save preserved registers.
frame._gpSaveSize = uint16_t(Support::popcnt(frame.savedRegs(Reg::kGroupGp )) * registerSize);
frame._nonGpSaveSize = uint16_t(Support::popcnt(frame.savedRegs(Reg::kGroupVec )) * vecSize +
Support::popcnt(frame.savedRegs(Reg::kGroupMm )) * 8 +
Support::popcnt(frame.savedRegs(Reg::kGroupKReg)) * 8);
uint32_t v = 0; // The beginning of the stack frame relative to SP after prolog.
v += frame.callStackSize(); // Count 'callStackSize' <- This is used to call functions.
v = Support::alignUp(v, stackAlignment); // Align to function's stack alignment.
frame._localStackOffset = v; // Store 'localStackOffset' <- Function's local stack starts here.
v += frame.localStackSize(); // Count 'localStackSize' <- Function's local stack ends here.
// If the function's stack must be aligned, calculate the alignment necessary
// to store vector registers, and set `FuncFrame::kAttrAlignedVecSR` to inform
// PEI that it can use instructions that perform aligned stores/loads.
if (stackAlignment >= vecSize && frame._nonGpSaveSize) {
frame.addAttributes(FuncFrame::kAttrAlignedVecSR);
v = Support::alignUp(v, vecSize); // Align '_nonGpSaveOffset'.
}
frame._nonGpSaveOffset = v; // Store '_nonGpSaveOffset' <- Non-GP Save/Restore starts here.
v += frame._nonGpSaveSize; // Count '_nonGpSaveSize' <- Non-GP Save/Restore ends here.
// Calculate if dynamic alignment (DA) slot (stored as offset relative to SP) is required and its offset.
if (hasDA && !hasFP) {
frame._daOffset = v; // Store 'daOffset' <- DA pointer would be stored here.
v += registerSize; // Count 'daOffset'.
}
else {
frame._daOffset = FuncFrame::kTagInvalidOffset;
}
// The return address should be stored after GP save/restore regs. It has
// the same size as `registerSize` (basically the native register/pointer
// size). We don't adjust it now as `v` now contains the exact size that the
// function requires to adjust (call frame + stack frame, vec stack size).
// The stack (if we consider this size) is misaligned now, as it's always
// aligned before the function call - when `call()` is executed it pushes
// the current EIP|RIP onto the stack, and misaligns it by 12 or 8 bytes
// (depending on the architecture). So count number of bytes needed to align
// it up to the function's CallFrame (the beginning).
if (v || frame.hasFuncCalls())
v += Support::alignUpDiff(v + frame.gpSaveSize() + registerSize, stackAlignment);
frame._gpSaveOffset = v; // Store 'gpSaveOffset' <- Function's GP Save/Restore starts here.
frame._stackAdjustment = v; // Store 'stackAdjustment' <- SA used by 'add zsp, SA' and 'sub zsp, SA'.
v += frame._gpSaveSize; // Count 'gpSaveSize' <- Function's GP Save/Restore ends here.
v += registerSize; // Count 'ReturnAddress' <- As CALL pushes onto stack.
// If the function performs dynamic stack alignment then the stack-adjustment must be aligned.
if (hasDA)
frame._stackAdjustment = Support::alignUp(frame._stackAdjustment, stackAlignment);
uint32_t saInvOff = FuncFrame::kTagInvalidOffset;
uint32_t saTmpOff = registerSize + frame._gpSaveSize;
// Calculate where the function arguments start relative to SP.
frame._saOffsetFromSP = hasDA ? saInvOff : v;
// Calculate where the function arguments start relative to FP or user-provided register.
frame._saOffsetFromSA = hasFP ? registerSize * 2 // Return address + frame pointer.
: saTmpOff; // Return address + all saved GP regs.
return kErrorOk;
}
// ============================================================================
// [asmjit::X86Internal - ArgsToFrameInfo]
// ============================================================================
ASMJIT_FAVOR_SIZE Error X86Internal::argsToFuncFrame(const FuncArgsAssignment& args, FuncFrame& frame) noexcept {
X86FuncArgsContext ctx;
ASMJIT_PROPAGATE(ctx.initWorkData(frame, args));
ASMJIT_PROPAGATE(ctx.markDstRegsDirty(frame));
ASMJIT_PROPAGATE(ctx.markScratchRegs(frame));
ASMJIT_PROPAGATE(ctx.markStackArgsReg(frame));
return kErrorOk;
}
// ============================================================================
// [asmjit::X86Internal - Emit Helpers]
// ============================================================================
ASMJIT_FAVOR_SIZE Error X86Internal::emitRegMove(Emitter* emitter,
const Operand_& dst_,
const Operand_& src_, uint32_t typeId, bool avxEnabled, const char* comment) {
// Invalid or abstract TypeIds are not allowed.
ASMJIT_ASSERT(Type::isValid(typeId) && !Type::isAbstract(typeId));
Operand dst(dst_);
Operand src(src_);
uint32_t instId = Inst::kIdNone;
uint32_t memFlags = 0;
uint32_t overrideMemSize = 0;
enum MemFlags : uint32_t {
kDstMem = 0x1,
kSrcMem = 0x2
};
// Detect memory operands and patch them to have the same size as the register.
// BaseCompiler always sets memory size of allocs and spills, so it shouldn't
// be really necessary, however, after this function was separated from Compiler
// it's better to make sure that the size is always specified, as we can use
// 'movzx' and 'movsx' that rely on it.
if (dst.isMem()) { memFlags |= kDstMem; dst.as<Mem>().setSize(src.size()); }
if (src.isMem()) { memFlags |= kSrcMem; src.as<Mem>().setSize(dst.size()); }
switch (typeId) {
case Type::kIdI8:
case Type::kIdU8:
case Type::kIdI16:
case Type::kIdU16:
// Special case - 'movzx' load.
if (memFlags & kSrcMem) {
instId = Inst::kIdMovzx;
dst.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
}
else if (!memFlags) {
// Change both destination and source registers to GPD (safer, no dependencies).
dst.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
src.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
}
ASMJIT_FALLTHROUGH;
case Type::kIdI32:
case Type::kIdU32:
case Type::kIdI64:
case Type::kIdU64:
instId = Inst::kIdMov;
break;
case Type::kIdMmx32:
instId = Inst::kIdMovd;
if (memFlags) break;
ASMJIT_FALLTHROUGH;
case Type::kIdMmx64 : instId = Inst::kIdMovq ; break;
case Type::kIdMask8 : instId = Inst::kIdKmovb; break;
case Type::kIdMask16: instId = Inst::kIdKmovw; break;
case Type::kIdMask32: instId = Inst::kIdKmovd; break;
case Type::kIdMask64: instId = Inst::kIdKmovq; break;
default: {
uint32_t elementTypeId = Type::baseOf(typeId);
if (Type::isVec32(typeId) && memFlags) {
overrideMemSize = 4;
if (elementTypeId == Type::kIdF32)
instId = avxEnabled ? Inst::kIdVmovss : Inst::kIdMovss;
else
instId = avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd;
break;
}
if (Type::isVec64(typeId) && memFlags) {
overrideMemSize = 8;
if (elementTypeId == Type::kIdF64)
instId = avxEnabled ? Inst::kIdVmovsd : Inst::kIdMovsd;
else
instId = avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq;
break;
}
if (elementTypeId == Type::kIdF32)
instId = avxEnabled ? Inst::kIdVmovaps : Inst::kIdMovaps;
else if (elementTypeId == Type::kIdF64)
instId = avxEnabled ? Inst::kIdVmovapd : Inst::kIdMovapd;
else if (typeId <= Type::_kIdVec256End)
instId = avxEnabled ? Inst::kIdVmovdqa : Inst::kIdMovdqa;
else if (elementTypeId <= Type::kIdU32)
instId = Inst::kIdVmovdqa32;
else
instId = Inst::kIdVmovdqa64;
break;
}
}
if (!instId)
return DebugUtils::errored(kErrorInvalidState);
if (overrideMemSize) {
if (dst.isMem()) dst.as<Mem>().setSize(overrideMemSize);
if (src.isMem()) src.as<Mem>().setSize(overrideMemSize);
}
emitter->setInlineComment(comment);
return emitter->emit(instId, dst, src);
}
ASMJIT_FAVOR_SIZE Error X86Internal::emitArgMove(Emitter* emitter,
const Reg& dst_, uint32_t dstTypeId,
const Operand_& src_, uint32_t srcTypeId, bool avxEnabled, const char* comment) {
// Deduce optional `dstTypeId`, which may be `Type::kIdVoid` in some cases.
if (!dstTypeId)
dstTypeId = opData.archRegs.regTypeToTypeId[dst_.type()];
// Invalid or abstract TypeIds are not allowed.
ASMJIT_ASSERT(Type::isValid(dstTypeId) && !Type::isAbstract(dstTypeId));
ASMJIT_ASSERT(Type::isValid(srcTypeId) && !Type::isAbstract(srcTypeId));
Reg dst(dst_);
Operand src(src_);
uint32_t dstSize = Type::sizeOf(dstTypeId);
uint32_t srcSize = Type::sizeOf(srcTypeId);
uint32_t instId = Inst::kIdNone;
// Not a real loop, just 'break' is nicer than 'goto'.
for (;;) {
if (Type::isInt(dstTypeId)) {
if (Type::isInt(srcTypeId)) {
instId = Inst::kIdMovsx;
uint32_t typeOp = (dstTypeId << 8) | srcTypeId;
// Sign extend by using 'movsx'.
if (typeOp == ((Type::kIdI16 << 8) | Type::kIdI8 ) ||
typeOp == ((Type::kIdI32 << 8) | Type::kIdI8 ) ||
typeOp == ((Type::kIdI32 << 8) | Type::kIdI16) ||
typeOp == ((Type::kIdI64 << 8) | Type::kIdI8 ) ||
typeOp == ((Type::kIdI64 << 8) | Type::kIdI16))
break;
// Sign extend by using 'movsxd'.
instId = Inst::kIdMovsxd;
if (typeOp == ((Type::kIdI64 << 8) | Type::kIdI32))
break;
}
if (Type::isInt(srcTypeId) || src_.isMem()) {
// Zero extend by using 'movzx' or 'mov'.
if (dstSize <= 4 && srcSize < 4) {
instId = Inst::kIdMovzx;
dst.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
}
else {
// We should have caught all possibilities where `srcSize` is less
// than 4, so we don't have to worry about 'movzx' anymore. Minimum
// size is enough to determine if we want 32-bit or 64-bit move.
instId = Inst::kIdMov;
srcSize = Support::min(srcSize, dstSize);
dst.setSignature(srcSize == 4 ? Reg::signatureOfT<Reg::kTypeGpd>()
: Reg::signatureOfT<Reg::kTypeGpq>());
if (src.isReg())
src.setSignature(dst.signature());
}
break;
}
// NOTE: The previous branch caught all memory sources, from here it's
// always register to register conversion, so catch the remaining cases.
srcSize = Support::min(srcSize, dstSize);
if (Type::isMmx(srcTypeId)) {
// 64-bit move.
instId = Inst::kIdMovq;
if (srcSize == 8)
break;
// 32-bit move.
instId = Inst::kIdMovd;
dst.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
break;
}
if (Type::isMask(srcTypeId)) {
instId = x86KmovFromSize(srcSize);
dst.setSignature(srcSize <= 4 ? Reg::signatureOfT<Reg::kTypeGpd>()
: Reg::signatureOfT<Reg::kTypeGpq>());
break;
}
if (Type::isVec(srcTypeId)) {
// 64-bit move.
instId = avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq;
if (srcSize == 8)
break;
// 32-bit move.
instId = avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd;
dst.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
break;
}
}
if (Type::isMmx(dstTypeId)) {
instId = Inst::kIdMovq;
srcSize = Support::min(srcSize, dstSize);
if (Type::isInt(srcTypeId) || src.isMem()) {
// 64-bit move.
if (srcSize == 8)
break;
// 32-bit move.
instId = Inst::kIdMovd;
if (src.isReg())
src.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
break;
}
if (Type::isMmx(srcTypeId))
break;
// This will hurt if `avxEnabled`.
instId = Inst::kIdMovdq2q;
if (Type::isVec(srcTypeId))
break;
}
if (Type::isMask(dstTypeId)) {
srcSize = Support::min(srcSize, dstSize);
if (Type::isInt(srcTypeId) || Type::isMask(srcTypeId) || src.isMem()) {
instId = x86KmovFromSize(srcSize);
if (Reg::isGp(src) && srcSize <= 4)
src.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
break;
}
}
if (Type::isVec(dstTypeId)) {
// By default set destination to XMM, will be set to YMM|ZMM if needed.
dst.setSignature(Reg::signatureOfT<Reg::kTypeXmm>());
// This will hurt if `avxEnabled`.
if (Reg::isMm(src)) {
// 64-bit move.
instId = Inst::kIdMovq2dq;
break;
}
// Argument conversion.
uint32_t dstElement = Type::baseOf(dstTypeId);
uint32_t srcElement = Type::baseOf(srcTypeId);
if (dstElement == Type::kIdF32 && srcElement == Type::kIdF64) {
srcSize = Support::min(dstSize * 2, srcSize);
dstSize = srcSize / 2;
if (srcSize <= 8)
instId = avxEnabled ? Inst::kIdVcvtss2sd : Inst::kIdCvtss2sd;
else
instId = avxEnabled ? Inst::kIdVcvtps2pd : Inst::kIdCvtps2pd;
if (dstSize == 32)
dst.setSignature(Reg::signatureOfT<Reg::kTypeYmm>());
if (src.isReg())
src.setSignature(Reg::signatureOfVecBySize(srcSize));
break;
}
if (dstElement == Type::kIdF64 && srcElement == Type::kIdF32) {
srcSize = Support::min(dstSize, srcSize * 2) / 2;
dstSize = srcSize * 2;
if (srcSize <= 4)
instId = avxEnabled ? Inst::kIdVcvtsd2ss : Inst::kIdCvtsd2ss;
else
instId = avxEnabled ? Inst::kIdVcvtpd2ps : Inst::kIdCvtpd2ps;
dst.setSignature(Reg::signatureOfVecBySize(dstSize));
if (src.isReg() && srcSize >= 32)
src.setSignature(Reg::signatureOfT<Reg::kTypeYmm>());
break;
}
srcSize = Support::min(srcSize, dstSize);
if (Reg::isGp(src) || src.isMem()) {
// 32-bit move.
if (srcSize <= 4) {
instId = avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd;
if (src.isReg())
src.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
break;
}
// 64-bit move.
if (srcSize == 8) {
instId = avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq;
break;
}
}
if (Reg::isVec(src) || src.isMem()) {
instId = avxEnabled ? Inst::kIdVmovaps : Inst::kIdMovaps;
if (src.isMem() && srcSize < emitter->environment().stackAlignment())
instId = avxEnabled ? Inst::kIdVmovups : Inst::kIdMovups;
uint32_t signature = Reg::signatureOfVecBySize(srcSize);
dst.setSignature(signature);
if (src.isReg())
src.setSignature(signature);
break;
}
}
return DebugUtils::errored(kErrorInvalidState);
}
if (src.isMem())
src.as<Mem>().setSize(srcSize);
emitter->setInlineComment(comment);
return emitter->emit(instId, dst, src);
}
// ============================================================================
// [asmjit::X86Internal - Emit Prolog & Epilog]
// ============================================================================
static ASMJIT_INLINE void X86Internal_setupSaveRestoreInfo(uint32_t group, const FuncFrame& frame, Reg& xReg, uint32_t& xInst, uint32_t& xSize) noexcept {
switch (group) {
case Reg::kGroupVec:
xReg = xmm(0);
xInst = x86GetXmmMovInst(frame);
xSize = xReg.size();
break;
case Reg::kGroupMm:
xReg = mm(0);
xInst = Inst::kIdMovq;
xSize = xReg.size();
break;
case Reg::kGroupKReg:
xReg = k(0);
xInst = Inst::kIdKmovq;
xSize = xReg.size();
break;
}
}
ASMJIT_FAVOR_SIZE Error X86Internal::emitProlog(Emitter* emitter, const FuncFrame& frame) {
uint32_t gpSaved = frame.savedRegs(Reg::kGroupGp);
Gp zsp = emitter->zsp(); // ESP|RSP register.
Gp zbp = emitter->zbp(); // EBP|RBP register.
Gp gpReg = zsp; // General purpose register (temporary).
Gp saReg = zsp; // Stack-arguments base pointer.
// Emit: 'push zbp'
// 'mov zbp, zsp'.
if (frame.hasPreservedFP()) {
gpSaved &= ~Support::bitMask(Gp::kIdBp);
ASMJIT_PROPAGATE(emitter->push(zbp));
ASMJIT_PROPAGATE(emitter->mov(zbp, zsp));
}
// Emit: 'push gp' sequence.
{
Support::BitWordIterator<uint32_t> it(gpSaved);
while (it.hasNext()) {
gpReg.setId(it.next());
ASMJIT_PROPAGATE(emitter->push(gpReg));
}
}
// Emit: 'mov saReg, zsp'.
uint32_t saRegId = frame.saRegId();
if (saRegId != BaseReg::kIdBad && saRegId != Gp::kIdSp) {
saReg.setId(saRegId);
if (frame.hasPreservedFP()) {
if (saRegId != Gp::kIdBp)
ASMJIT_PROPAGATE(emitter->mov(saReg, zbp));
}
else {
ASMJIT_PROPAGATE(emitter->mov(saReg, zsp));
}
}
// Emit: 'and zsp, StackAlignment'.
if (frame.hasDynamicAlignment()) {
ASMJIT_PROPAGATE(emitter->and_(zsp, -int32_t(frame.finalStackAlignment())));
}
// Emit: 'sub zsp, StackAdjustment'.
if (frame.hasStackAdjustment()) {
ASMJIT_PROPAGATE(emitter->sub(zsp, frame.stackAdjustment()));
}
// Emit: 'mov [zsp + DAOffset], saReg'.
if (frame.hasDynamicAlignment() && frame.hasDAOffset()) {
Mem saMem = ptr(zsp, int32_t(frame.daOffset()));
ASMJIT_PROPAGATE(emitter->mov(saMem, saReg));
}
// Emit 'movxxx [zsp + X], {[x|y|z]mm, k}'.
{
Reg xReg;
Mem xBase = ptr(zsp, int32_t(frame.nonGpSaveOffset()));
uint32_t xInst;
uint32_t xSize;
for (uint32_t group = 1; group < BaseReg::kGroupVirt; group++) {
Support::BitWordIterator<uint32_t> it(frame.savedRegs(group));
if (it.hasNext()) {
X86Internal_setupSaveRestoreInfo(group, frame, xReg, xInst, xSize);
do {
xReg.setId(it.next());
ASMJIT_PROPAGATE(emitter->emit(xInst, xBase, xReg));
xBase.addOffsetLo32(int32_t(xSize));
} while (it.hasNext());
}
}
}
return kErrorOk;
}
ASMJIT_FAVOR_SIZE Error X86Internal::emitEpilog(Emitter* emitter, const FuncFrame& frame) {
uint32_t i;
uint32_t regId;
uint32_t registerSize = emitter->registerSize();
uint32_t gpSaved = frame.savedRegs(Reg::kGroupGp);
Gp zsp = emitter->zsp(); // ESP|RSP register.
Gp zbp = emitter->zbp(); // EBP|RBP register.
Gp gpReg = emitter->zsp(); // General purpose register (temporary).
// Don't emit 'pop zbp' in the pop sequence, this case is handled separately.
if (frame.hasPreservedFP())
gpSaved &= ~Support::bitMask(Gp::kIdBp);
// Emit 'movxxx {[x|y|z]mm, k}, [zsp + X]'.
{
Reg xReg;
Mem xBase = ptr(zsp, int32_t(frame.nonGpSaveOffset()));
uint32_t xInst;
uint32_t xSize;
for (uint32_t group = 1; group < BaseReg::kGroupVirt; group++) {
Support::BitWordIterator<uint32_t> it(frame.savedRegs(group));
if (it.hasNext()) {
X86Internal_setupSaveRestoreInfo(group, frame, xReg, xInst, xSize);
do {
xReg.setId(it.next());
ASMJIT_PROPAGATE(emitter->emit(xInst, xReg, xBase));
xBase.addOffsetLo32(int32_t(xSize));
} while (it.hasNext());
}
}
}
// Emit 'emms' and/or 'vzeroupper'.
if (frame.hasMmxCleanup()) ASMJIT_PROPAGATE(emitter->emms());
if (frame.hasAvxCleanup()) ASMJIT_PROPAGATE(emitter->vzeroupper());
if (frame.hasPreservedFP()) {
// Emit 'mov zsp, zbp' or 'lea zsp, [zbp - x]'
int32_t count = int32_t(frame.gpSaveSize() - registerSize);
if (!count)
ASMJIT_PROPAGATE(emitter->mov(zsp, zbp));
else
ASMJIT_PROPAGATE(emitter->lea(zsp, ptr(zbp, -count)));
}
else {
if (frame.hasDynamicAlignment() && frame.hasDAOffset()) {
// Emit 'mov zsp, [zsp + DsaSlot]'.
Mem saMem = ptr(zsp, int32_t(frame.daOffset()));
ASMJIT_PROPAGATE(emitter->mov(zsp, saMem));
}
else if (frame.hasStackAdjustment()) {
// Emit 'add zsp, StackAdjustment'.
ASMJIT_PROPAGATE(emitter->add(zsp, int32_t(frame.stackAdjustment())));
}
}
// Emit 'pop gp' sequence.
if (gpSaved) {
i = gpSaved;
regId = 16;
do {
regId--;
if (i & 0x8000) {
gpReg.setId(regId);
ASMJIT_PROPAGATE(emitter->pop(gpReg));
}
i <<= 1;
} while (regId != 0);
}
// Emit 'pop zbp'.
if (frame.hasPreservedFP())
ASMJIT_PROPAGATE(emitter->pop(zbp));
// Emit 'ret' or 'ret x'.
if (frame.hasCalleeStackCleanup())
ASMJIT_PROPAGATE(emitter->emit(Inst::kIdRet, int(frame.calleeStackCleanup())));
else
ASMJIT_PROPAGATE(emitter->emit(Inst::kIdRet));
return kErrorOk;
}
// ============================================================================
// [asmjit::X86Internal - Emit Arguments Assignment]
// ============================================================================
#ifdef ASMJIT_DUMP_ARGS_ASSIGNMENT
static void dumpFuncValue(String& sb, uint32_t arch, const FuncValue& value) noexcept {
Formatter::formatTypeId(sb, value.typeId());
sb.append('@');
if (value.isIndirect())
sb.append('[');
if (value.isReg())
Formatter::formatRegister(sb, 0, nullptr, arch, value.regType(), value.regId());
else if (value.isStack())
sb.appendFormat("[%d]", value.stackOffset());
else
sb.append("<none>");
if (value.isIndirect())
sb.append(']');
}
static void dumpAssignment(String& sb, const X86FuncArgsContext& ctx) noexcept {
typedef X86FuncArgsContext::Var Var;
uint32_t arch = ctx.arch();
uint32_t varCount = ctx.varCount();
for (uint32_t i = 0; i < varCount; i++) {
const Var& var = ctx.var(i);
const FuncValue& dst = var.out;
const FuncValue& cur = var.cur;
sb.appendFormat("Var%u: ", i);
dumpFuncValue(sb, arch, dst);
sb.append(" <- ");
dumpFuncValue(sb, arch, cur);
if (var.isDone())
sb.append(" {Done}");
sb.append('\n');
}
}
#endif
ASMJIT_FAVOR_SIZE Error X86Internal::emitArgsAssignment(Emitter* emitter, const FuncFrame& frame, const FuncArgsAssignment& args) {
typedef X86FuncArgsContext::Var Var;
typedef X86FuncArgsContext::WorkData WorkData;
enum WorkFlags : uint32_t {
kWorkNone = 0x00,
kWorkDidSome = 0x01,
kWorkPending = 0x02,
kWorkPostponed = 0x04
};
X86FuncArgsContext ctx;
ASMJIT_PROPAGATE(ctx.initWorkData(frame, args));
#ifdef ASMJIT_DUMP_ARGS_ASSIGNMENT
{
String sb;
dumpAssignment(sb, ctx);
printf("%s\n", sb.data());
}
#endif
uint32_t arch = ctx.arch();
uint32_t varCount = ctx._varCount;
WorkData* workData = ctx._workData;
// Use AVX if it's enabled.
bool avxEnabled = frame.isAvxEnabled();
uint32_t saVarId = ctx._saVarId;
uint32_t saRegId = Gp::kIdSp;
if (frame.hasDynamicAlignment()) {
if (frame.hasPreservedFP())
saRegId = Gp::kIdBp;
else
saRegId = saVarId < varCount ? ctx._vars[saVarId].cur.regId() : frame.saRegId();
}
RegInfo gpRegInfo = emitter->_gpRegInfo;
// --------------------------------------------------------------------------
// Register to stack and stack to stack moves must be first as now we have
// the biggest chance of having as many as possible unassigned registers.
// --------------------------------------------------------------------------
if (ctx._stackDstMask) {
// Base address of all arguments passed by stack.
Mem baseArgPtr = ptr(emitter->gpz(saRegId), int32_t(frame.saOffset(saRegId)));
Mem baseStackPtr = ptr(emitter->gpz(Gp::kIdSp), int32_t(0));
for (uint32_t varId = 0; varId < varCount; varId++) {
Var& var = ctx._vars[varId];
if (!var.out.isStack())
continue;
FuncValue& cur = var.cur;
FuncValue& out = var.out;
ASMJIT_ASSERT(cur.isReg() || cur.isStack());
Reg reg;
Mem dstStackPtr = baseStackPtr.cloneAdjusted(out.stackOffset());
Mem srcStackPtr = baseArgPtr.cloneAdjusted(cur.stackOffset());
if (cur.isIndirect()) {
if (cur.isStack()) {
// TODO: Indirect stack.
return DebugUtils::errored(kErrorInvalidAssignment);
}
else {
srcStackPtr = ptr(Gp(gpRegInfo.signature(), cur.regId()));
}
}
if (cur.isReg() && !cur.isIndirect()) {
WorkData& wd = workData[Reg::groupOf(cur.regType())];
uint32_t rId = cur.regId();
reg.setSignatureAndId(Reg::signatureOf(cur.regType()), rId);
wd.unassign(varId, rId);
}
else {
// Stack to reg move - tricky since we move stack to stack we can decide which
// register to use. In general we follow the rule that IntToInt moves will use
// GP regs with possibility to signature or zero extend, and all other moves will
// either use GP or VEC regs depending on the size of the move.
RegInfo rInfo = x86GetRegForMemToMemMove(arch, out.typeId(), cur.typeId());
if (ASMJIT_UNLIKELY(!rInfo.isValid()))
return DebugUtils::errored(kErrorInvalidState);
WorkData& wd = workData[rInfo.group()];
uint32_t availableRegs = wd.availableRegs();
if (ASMJIT_UNLIKELY(!availableRegs))
return DebugUtils::errored(kErrorInvalidState);
uint32_t rId = Support::ctz(availableRegs);
reg.setSignatureAndId(rInfo.signature(), rId);
ASMJIT_PROPAGATE(emitArgMove(emitter, reg, out.typeId(), srcStackPtr, cur.typeId(), avxEnabled));
}
if (cur.isIndirect() && cur.isReg())
workData[BaseReg::kGroupGp].unassign(varId, cur.regId());
// Register to stack move.
ASMJIT_PROPAGATE(emitRegMove(emitter, dstStackPtr, reg, cur.typeId(), avxEnabled));
var.markDone();
}
}
// --------------------------------------------------------------------------
// Shuffle all registers that are currently assigned accordingly to target
// assignment.
// --------------------------------------------------------------------------
uint32_t workFlags = kWorkNone;
for (;;) {
for (uint32_t varId = 0; varId < varCount; varId++) {
Var& var = ctx._vars[varId];
if (var.isDone() || !var.cur.isReg())
continue;
FuncValue& cur = var.cur;
FuncValue& out = var.out;
uint32_t curGroup = Reg::groupOf(cur.regType());
uint32_t outGroup = Reg::groupOf(out.regType());
uint32_t curId = cur.regId();
uint32_t outId = out.regId();
if (curGroup != outGroup) {
// TODO: Conversion is not supported.
return DebugUtils::errored(kErrorInvalidAssignment);
}
else {
WorkData& wd = workData[outGroup];
if (!wd.isAssigned(outId)) {
EmitMove:
ASMJIT_PROPAGATE(
emitArgMove(emitter,
Reg::fromTypeAndId(out.regType(), outId), out.typeId(),
Reg::fromTypeAndId(cur.regType(), curId), cur.typeId(), avxEnabled));
wd.reassign(varId, outId, curId);
cur.initReg(out.regType(), outId, out.typeId());
if (outId == out.regId())
var.markDone();
workFlags |= kWorkDidSome | kWorkPending;
}
else {
uint32_t altId = wd._physToVarId[outId];
Var& altVar = ctx._vars[altId];
if (!altVar.out.isInitialized() || (altVar.out.isReg() && altVar.out.regId() == curId)) {
// Swap operation is possible only between two GP registers.
if (curGroup == Reg::kGroupGp) {
uint32_t highestType = Support::max(cur.regType(), altVar.cur.regType());
uint32_t signature = highestType == Reg::kTypeGpq ? Reg::signatureOfT<Reg::kTypeGpq>()
: Reg::signatureOfT<Reg::kTypeGpd>();
ASMJIT_PROPAGATE(emitter->emit(Inst::kIdXchg, Reg(signature, outId), Reg(signature, curId)));
wd.swap(varId, curId, altId, outId);
cur.setRegId(outId);
var.markDone();
altVar.cur.setRegId(curId);
if (altVar.out.isInitialized())
altVar.markDone();
workFlags |= kWorkDidSome;
}
else {
// If there is a scratch register it can be used to perform the swap.
uint32_t availableRegs = wd.availableRegs();
if (availableRegs) {
uint32_t inOutRegs = wd.dstRegs();
if (availableRegs & ~inOutRegs)
availableRegs &= ~inOutRegs;
outId = Support::ctz(availableRegs);
goto EmitMove;
}
else {
workFlags |= kWorkPending;
}
}
}
else {
workFlags |= kWorkPending;
}
}
}
}
if (!(workFlags & kWorkPending))
break;
// If we did nothing twice it means that something is really broken.
if ((workFlags & (kWorkDidSome | kWorkPostponed)) == kWorkPostponed)
return DebugUtils::errored(kErrorInvalidState);
workFlags = (workFlags & kWorkDidSome) ? kWorkNone : kWorkPostponed;
}
// --------------------------------------------------------------------------
// Load arguments passed by stack into registers. This is pretty simple and
// it never requires multiple iterations like the previous phase.
// --------------------------------------------------------------------------
if (ctx._hasStackSrc) {
uint32_t iterCount = 1;
if (frame.hasDynamicAlignment() && !frame.hasPreservedFP())
saRegId = saVarId < varCount ? ctx._vars[saVarId].cur.regId() : frame.saRegId();
// Base address of all arguments passed by stack.
Mem baseArgPtr = ptr(emitter->gpz(saRegId), int32_t(frame.saOffset(saRegId)));
for (uint32_t iter = 0; iter < iterCount; iter++) {
for (uint32_t varId = 0; varId < varCount; varId++) {
Var& var = ctx._vars[varId];
if (var.isDone())
continue;
if (var.cur.isStack()) {
ASMJIT_ASSERT(var.out.isReg());
uint32_t outId = var.out.regId();
uint32_t outType = var.out.regType();
uint32_t group = Reg::groupOf(outType);
WorkData& wd = ctx._workData[group];
if (outId == saRegId && group == BaseReg::kGroupGp) {
// This register will be processed last as we still need `saRegId`.
if (iterCount == 1) {
iterCount++;
continue;
}
wd.unassign(wd._physToVarId[outId], outId);
}
Reg dstReg = Reg::fromTypeAndId(outType, outId);
Mem srcMem = baseArgPtr.cloneAdjusted(var.cur.stackOffset());
ASMJIT_PROPAGATE(
emitArgMove(emitter,
dstReg, var.out.typeId(),
srcMem, var.cur.typeId(), avxEnabled));
wd.assign(varId, outId);
var.cur.initReg(outType, outId, var.cur.typeId(), FuncValue::kFlagIsDone);
}
}
}
}
return kErrorOk;
}
ASMJIT_END_SUB_NAMESPACE
#endif // ASMJIT_BUILD_X86