hphp/util/asm-x64-intelxed.h (807 lines of code) (raw):
/*
+----------------------------------------------------------------------+
| HipHop for PHP |
+----------------------------------------------------------------------+
| Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
| Copyright (c) 2018 Intel Corporation |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
*/
#pragma once
extern "C" {
#include <xed-interface.h>
}
#include <tbb/concurrent_unordered_map.h>
/*
* A macro assembler for x64, based on the Intel XED library, that strives
* for low coupling to the runtime environment and ease of extendability.
*/
namespace HPHP::jit {
struct XedInit {
XedInit() {
xed_tables_init();
}
};
///////////////////////////////////////////////////////////////////////////////
struct XedAssembler final : public X64AssemblerBase {
private:
static constexpr xed_state_t kXedState = {
XED_MACHINE_MODE_LONG_64,
XED_ADDRESS_WIDTH_64b
};
CodeAddress dest() const {
codeBlock.assertCanEmit(XED_MAX_INSTRUCTION_BYTES);
return codeBlock.toDestAddress(codeBlock.frontier());
}
static constexpr auto nullrip = RIPRelativeRef(DispRIP(0));
public:
explicit XedAssembler(CodeBlock& cb) : X64AssemblerBase(cb) {}
XedAssembler(const XedAssembler&) = delete;
XedAssembler& operator=(const XedAssembler&) = delete;
/*
* The following section defines the main interface for emitting
* x64.
*
* Simple Examples:
*
* a. movq (rax, rbx); // order is AT&T: src, dest
* a. loadq (*rax, rbx); // loads from *rax
* a. loadq (rax[0], rbx); // also loads from *rax
* a. storeq (rcx, rax[0xc]); // store to rax + 0xc
* a. addq (0x1, rbx); // increment rbx
*
* Addressing with index registers:
*
* a. movl (index, ecx);
* a. loadq (*rax, rbx);
* a. storeq (rbx, rbx[rcx*8]);
* a. call (rax); // indirect call
*
*/
#define BYTE_LOAD_OP(name, instr) \
void name##b(MemoryRef m, Reg8 r) { xedInstrMR(instr, m, r); }
#define LOAD_OP(name, instr) \
void name##q(MemoryRef m, Reg64 r) { xedInstrMR(instr, m, r); } \
void name##l(MemoryRef m, Reg32 r) { xedInstrMR(instr, m, r); } \
void name##w(MemoryRef m, Reg16 r) { xedInstrMR(instr, m, r); } \
void name##q(RIPRelativeRef m, Reg64 r) { xedInstrMR(instr, m, r); }\
BYTE_LOAD_OP(name, instr)
#define BYTE_STORE_OP(name, instr) \
void name##b(Reg8 r, MemoryRef m) { xedInstrRM(instr, r, m); } \
void name##b(Immed i, MemoryRef m) { xedInstrIM(instr, i, m, \
sz::byte); }
#define STORE_OP(name, instr) \
void name##w(Immed i, MemoryRef m) { \
xedInstrIM(instr, i, m, IMMPROP(sz::word, \
sz::word | sz::byte), sz::word); \
} \
void name##l(Immed i, MemoryRef m) { \
xedInstrIM(instr, i, m, IMMPROP(sz::dword, \
sz::dword | sz::byte), sz::dword);\
} \
void name##w(Reg16 r, MemoryRef m) { xedInstrRM(instr, r, m); } \
void name##l(Reg32 r, MemoryRef m) { xedInstrRM(instr, r, m); } \
void name##q(Reg64 r, MemoryRef m) { xedInstrRM(instr, r, m); } \
BYTE_STORE_OP(name, instr)
#define BYTE_REG_OP(name, instr) \
void name##b(Reg8 r1, Reg8 r2) { xedInstrRR(instr, r1, r2);} \
void name##b(Immed i, Reg8 r) { xedInstrIR(instr, i, r); }
#define REG_OP(name, instr) \
void name##q(Reg64 r1, Reg64 r2) { xedInstrRR(instr, r1, r2); } \
void name##l(Reg32 r1, Reg32 r2) { xedInstrRR(instr, r1, r2); } \
void name##w(Reg16 r1, Reg16 r2) { xedInstrRR(instr, r1, r2); } \
void name##l(Immed i, Reg32 r) { \
xedInstrIR(instr, i, r, IMMPROP(sz::dword, \
sz::dword | sz::byte)); \
} \
void name##w(Immed i, Reg16 r) { \
xedInstrIR(instr, i, r, IMMPROP(sz::word, \
sz::word | sz::byte)); \
} \
BYTE_REG_OP(name, instr)
#define IMM64_STORE_OP(name, instr) \
void name##q(Immed i, MemoryRef m) { \
xedInstrIM(instr, i, m, IMMPROP(sz::dword, \
sz::dword | sz::byte), sz::qword);\
}
#define IMM64R_OP(name, instr) \
void name##q(Immed imm, Reg64 r) { \
always_assert(imm.fits(sz::dword)); \
xedInstrIR(instr, imm, r, IMMPROP(sz::dword, \
sz::dword | sz::byte)); \
}
#define FULL_OP(name, instr) \
LOAD_OP(name, instr) \
STORE_OP(name, instr) \
REG_OP(name, instr) \
IMM64_STORE_OP(name, instr) \
IMM64R_OP(name, instr)
// We rename x64's mov to store and load for improved code
// readability.
#define IMMPROP(size, allsizes) size
LOAD_OP (load, XED_ICLASS_MOV)
STORE_OP (store,XED_ICLASS_MOV)
IMM64_STORE_OP (store,XED_ICLASS_MOV)
REG_OP (mov, XED_ICLASS_MOV)
FULL_OP (test, XED_ICLASS_TEST)
#undef IMMPROP
#define IMMPROP(size, allsizes) allsizes
FULL_OP(add, XED_ICLASS_ADD)
FULL_OP(xor, XED_ICLASS_XOR)
FULL_OP(sub, XED_ICLASS_SUB)
FULL_OP(and, XED_ICLASS_AND)
FULL_OP(or, XED_ICLASS_OR)
FULL_OP(cmp, XED_ICLASS_CMP)
FULL_OP(sbb, XED_ICLASS_SBB)
#undef IMMPROP
#undef IMM64R_OP
#undef FULL_OP
#undef REG_OP
#undef STORE_OP
#undef LOAD_OP
#undef BYTE_LOAD_OP
#undef BYTE_STORE_OP
#undef BYTE_REG_OP
#undef IMM64_STORE_OP
// 64-bit immediates work with mov to a register.
void movq(Immed64 imm, Reg64 r) { xedInstrIR(XED_ICLASS_MOV, imm, r,
sz::qword | sz::dword); }
// movzbx is a special snowflake. We don't have movzbq because it behaves
// exactly the same as movzbl but takes an extra byte.
void loadzbl(MemoryRef m, Reg32 r) { xedInstrMR(XED_ICLASS_MOVZX,
m, r, sz::byte); }
void loadzwl(MemoryRef m, Reg32 r) { xedInstrMR(XED_ICLASS_MOVZX,
m, r, sz::word); }
void movzbl(Reg8 src, Reg32 dest) { xedInstrRR(XED_ICLASS_MOVZX,
src, dest); }
void movsbl(Reg8 src, Reg32 dest) { xedInstrRR(XED_ICLASS_MOVSX,
src, dest); }
void movzwl(Reg16 src, Reg32 dest) { xedInstrRR(XED_ICLASS_MOVZX,
src, dest); }
void loadsbq(MemoryRef m, Reg64 r) { xedInstrMR(XED_ICLASS_MOVSX,
m, r, sz::byte); }
void movsbq(Reg8 src, Reg64 dest) { xedInstrRR(XED_ICLASS_MOVSX,
src, dest); }
void crc32q(Reg64 src, Reg64 dest) { xedInstrRR(XED_ICLASS_CRC32,
src, dest); }
void lea(MemoryRef p, Reg64 reg) { xedInstrMR(XED_ICLASS_LEA, p, reg); }
void lea(RIPRelativeRef p, Reg64 reg) { xedInstrMR(XED_ICLASS_LEA, p, reg); }
void xchgq(Reg64 r1, Reg64 r2) { xedInstrRR(XED_ICLASS_XCHG, r1, r2); }
void xchgl(Reg32 r1, Reg32 r2) { xedInstrRR(XED_ICLASS_XCHG, r1, r2); }
void xchgb(Reg8 r1, Reg8 r2) { xedInstrRR(XED_ICLASS_XCHG, r1, r2); }
void imul(Reg64 r1, Reg64 r2) { xedInstrRR(XED_ICLASS_IMUL, r1, r2); }
void push(Reg64 r) { xedInstrR(XED_ICLASS_PUSH, r); }
void pushl(Reg32 r) { xedInstrR(XED_ICLASS_PUSH, r); }
void pop (Reg64 r) { xedInstrR(XED_ICLASS_POP, r); }
void idiv(Reg64 r) { xedInstrR(XED_ICLASS_IDIV, r); }
void incq(Reg64 r) { xedInstrR(XED_ICLASS_INC, r); }
void incl(Reg32 r) { xedInstrR(XED_ICLASS_INC, r); }
void incw(Reg16 r) { xedInstrR(XED_ICLASS_INC, r); }
void decq(Reg64 r) { xedInstrR(XED_ICLASS_DEC, r); }
void decl(Reg32 r) { xedInstrR(XED_ICLASS_DEC, r); }
void decw(Reg16 r) { xedInstrR(XED_ICLASS_DEC, r); }
void notb(Reg8 r) { xedInstrR(XED_ICLASS_NOT, r); }
void not(Reg64 r) { xedInstrR(XED_ICLASS_NOT, r); }
void neg(Reg64 r) { xedInstrR(XED_ICLASS_NEG, r); }
void negb(Reg8 r) { xedInstrR(XED_ICLASS_NEG, r); }
void ret() { xedInstr(XED_ICLASS_RET_NEAR); }
void ret(Immed i) { xedInstrI(XED_ICLASS_IRET, i,
sz::word); }
void cqo() { xedInstr(XED_ICLASS_CQO); }
void nop() { xedInstr(XED_ICLASS_NOP, sz::byte); }
void int3() { xedInstr(XED_ICLASS_INT3, sz::byte); }
void ud2() { xedInstr(XED_ICLASS_UD2, sz::byte); }
void pushf() { xedInstr(XED_ICLASS_PUSHF, sz::word); }
void popf() { xedInstr(XED_ICLASS_POPF, sz::word); }
void lock() { always_assert(false); }
void push(MemoryRef m) { xedInstrM(XED_ICLASS_PUSH, m); }
void pop (MemoryRef m) { xedInstrM(XED_ICLASS_POP, m); }
void prefetch(MemoryRef m) { xedInstrM(XED_ICLASS_PREFETCHT1, m); }
void incq(MemoryRef m) { xedInstrM(XED_ICLASS_INC, m); }
void incl(MemoryRef m) { xedInstrM(XED_ICLASS_INC, m, sz::dword); }
void incw(MemoryRef m) { xedInstrM(XED_ICLASS_INC, m, sz::word); }
void decqlock(MemoryRef m) { xedInstrM(XED_ICLASS_DEC_LOCK, m); }
void decq(MemoryRef m) { xedInstrM(XED_ICLASS_DEC, m); }
void decl(MemoryRef m) { xedInstrM(XED_ICLASS_DEC, m, sz::dword); }
void decw(MemoryRef m) { xedInstrM(XED_ICLASS_DEC, m, sz::word); }
//special case for push(imm)
void push(Immed64 i) {
xed_encoder_operand_t op = toXedOperand(i, sz::byte | sz::word | sz::dword);
xedEmit(XED_ICLASS_PUSH, op, op.width_bits < 32 ? 16 : 64);
}
void movups(RegXMM x, MemoryRef m) { xedInstrRM(XED_ICLASS_MOVUPS,
x, m, sz::qword * 2); }
void movups(MemoryRef m, RegXMM x) { xedInstrMR(XED_ICLASS_MOVUPS,
m, x, sz::qword * 2); }
void movdqu(RegXMM x, MemoryRef m) { xedInstrRM(XED_ICLASS_MOVDQU,
x, m); }
void movdqu(MemoryRef m, RegXMM x) { xedInstrMR(XED_ICLASS_MOVDQU,
m, x); }
void movdqa(RegXMM x, RegXMM y) { xedInstrRR(XED_ICLASS_MOVDQA,
y, x); }
void movdqa(RegXMM x, MemoryRef m) { xedInstrRM(XED_ICLASS_MOVDQA,
x, m); }
void movdqa(MemoryRef m, RegXMM x) { xedInstrMR(XED_ICLASS_MOVDQA,
m, x); }
void movsd (RegXMM x, RegXMM y) { xedInstrRR(XED_ICLASS_MOVSD_XMM,
y, x); }
void movsd (RegXMM x, MemoryRef m) { xedInstrRM(XED_ICLASS_MOVSD_XMM,
x, m); }
void movsd (MemoryRef m, RegXMM x) { xedInstrMR(XED_ICLASS_MOVSD_XMM,
m, x); }
void movsd (RIPRelativeRef m, RegXMM x) { xedInstrMR(XED_ICLASS_MOVSD_XMM,
m, x); }
void lddqu (MemoryRef m, RegXMM x) { xedInstrMR(XED_ICLASS_LDDQU,
m, x); }
void unpcklpd(RegXMM s, RegXMM d) { xedInstrRR(XED_ICLASS_UNPCKLPD,
d, s); }
void rorq (Immed i, Reg64 r) { xedInstrIR(XED_ICLASS_ROR, i, r, sz::byte); }
void shlq (Immed i, Reg64 r) { xedInstrIR(XED_ICLASS_SHL, i, r, sz::byte); }
void shrq (Immed i, Reg64 r) { xedInstrIR(XED_ICLASS_SHR, i, r, sz::byte); }
void sarq (Immed i, Reg64 r) { xedInstrIR(XED_ICLASS_SAR, i, r, sz::byte); }
void shll (Immed i, Reg32 r) { xedInstrIR(XED_ICLASS_SHL, i, r, sz::byte); }
void shrl (Immed i, Reg32 r) { xedInstrIR(XED_ICLASS_SHR, i, r, sz::byte); }
void shlw (Immed i, Reg16 r) { xedInstrIR(XED_ICLASS_SHL, i, r, sz::byte); }
void shrw (Immed i, Reg16 r) { xedInstrIR(XED_ICLASS_SHR, i, r, sz::byte); }
void shlq (Reg64 r) { xedInstrRR_CL(XED_ICLASS_SHL, r); }
void shrq (Reg64 r) { xedInstrRR_CL(XED_ICLASS_SHR, r); }
void sarq (Reg64 r) { xedInstrRR_CL(XED_ICLASS_SAR, r); }
void btrq (Immed i, Reg64 r) { xedInstrIR(XED_ICLASS_BTR, i, r, sz::byte); }
void roundsd (RoundDirection d, RegXMM src, RegXMM dst) {
Immed i((int)d);
xedInstrIRR(XED_ICLASS_ROUNDSD, dst, src, i, sz::byte);
}
void cmpsd(RegXMM src, RegXMM dst, ComparisonPred pred) {
Immed i((int)pred);
xedInstrIRR(XED_ICLASS_CMPSD_XMM, dst, src, i, sz::byte);
}
/*
* Control-flow directives. Primitive labeling/patching facilities
* are available, as well as slightly higher-level ones via the
* Label class.
*/
void jmp(Reg64 r) { xedInstrR(XED_ICLASS_JMP, r); }
void jmp(MemoryRef m) { xedInstrM(XED_ICLASS_JMP, m); }
void jmp(RIPRelativeRef m) { xedInstrM(XED_ICLASS_JMP, m); }
void call(Reg64 r) { xedInstrR(XED_ICLASS_CALL_NEAR, r); }
void call(MemoryRef m) { xedInstrM(XED_ICLASS_CALL_NEAR, m); }
void call(RIPRelativeRef m) { xedInstrM(XED_ICLASS_CALL_NEAR, m); }
void jmp8(CodeAddress dest) { xedInstrRelBr(XED_ICLASS_JMP,
dest, sz::byte); }
void jmp(CodeAddress dest) {
xedInstrRelBr(XED_ICLASS_JMP, dest, sz::dword);
}
void call(CodeAddress dest) {
xedInstrRelBr(XED_ICLASS_CALL_NEAR, dest, sz::dword);
}
void jcc(ConditionCode cond, CodeAddress dest) {
xedInstrRelBr(ccToXedJump(cond), dest, sz::dword);
}
void jcc8(ConditionCode cond, CodeAddress dest) {
xedInstrRelBr(ccToXedJump(cond), dest, sz::byte);
}
using X64AssemblerBase::call;
using X64AssemblerBase::jmp;
using X64AssemblerBase::jmp8;
using X64AssemblerBase::jcc;
using X64AssemblerBase::jcc8;
void setcc(int cc, Reg8 byteReg) {
xedInstrR(ccToXedSetCC(cc), byteReg);
}
void psllq(Immed i, RegXMM r) { xedInstrIR(XED_ICLASS_PSLLQ, i, r,
sz::byte); }
void psrlq(Immed i, RegXMM r) { xedInstrIR(XED_ICLASS_PSRLQ, i, r,
sz::byte); }
void movq_rx(Reg64 rsrc, RegXMM rdest) {
xedInstrRR(XED_ICLASS_MOVQ, rsrc, rdest);
}
void movq_xr(RegXMM rsrc, Reg64 rdest) {
xedInstrRR(XED_ICLASS_MOVQ, rsrc, rdest);
}
void addsd(RegXMM src, RegXMM srcdest) {
xedInstrRR(XED_ICLASS_ADDSD, srcdest, src);
}
void mulsd(RegXMM src, RegXMM srcdest) {
xedInstrRR(XED_ICLASS_MULSD, srcdest, src);
}
void subsd(RegXMM src, RegXMM srcdest) {
xedInstrRR(XED_ICLASS_SUBSD, srcdest, src);
}
void pxor(RegXMM src, RegXMM srcdest) {
xedInstrRR(XED_ICLASS_PXOR, srcdest, src);
}
void cvtsi2sd(Reg64 src, RegXMM dest) {
xedInstrRR(XED_ICLASS_CVTSI2SD, src, dest);
}
void cvtsi2sd(MemoryRef m, RegXMM dest) {
xedInstrMR(XED_ICLASS_CVTSI2SD, m, dest);
}
void ucomisd(RegXMM l, RegXMM r) {
xedInstrRR(XED_ICLASS_UCOMISD, l, r);
}
void sqrtsd(RegXMM src, RegXMM dest) {
xedInstrRR(XED_ICLASS_SQRTSD, dest, src);
}
void divsd(RegXMM src, RegXMM srcdest) {
xedInstrRR(XED_ICLASS_DIVSD, srcdest, src);
}
void cvttsd2siq(RegXMM src, Reg64 dest) {
xedInstrRR(XED_ICLASS_CVTTSD2SI, src, dest);
}
private:
// XED conditional jump conversion functions
#define CC_TO_XED_ARRAY(xed_instr) { \
XED_ICLASS_##xed_instr##O, /*CC_O */ \
XED_ICLASS_##xed_instr##NO, /*CC_NO */ \
XED_ICLASS_##xed_instr##B, /*CC_B, CC_NAE */ \
XED_ICLASS_##xed_instr##NB, /*CC_AE, CC_NB, CC_NC */ \
XED_ICLASS_##xed_instr##Z, /*CC_E, CC_Z */ \
XED_ICLASS_##xed_instr##NZ, /*CC_NE, CC_NZ */ \
XED_ICLASS_##xed_instr##BE, /*CC_BE, CC_NA */ \
XED_ICLASS_##xed_instr##NBE, /*CC_A, CC_NBE */ \
XED_ICLASS_##xed_instr##S, /*CC_S */ \
XED_ICLASS_##xed_instr##NS, /*CC_NS */ \
XED_ICLASS_##xed_instr##P, /*CC_P */ \
XED_ICLASS_##xed_instr##NP, /*CC_NP */ \
XED_ICLASS_##xed_instr##L, /*CC_L, CC_NGE */ \
XED_ICLASS_##xed_instr##NL, /*CC_GE, CC_NL */ \
XED_ICLASS_##xed_instr##LE, /*CC_LE, CC_NG */ \
XED_ICLASS_##xed_instr##NLE /*CC_G, CC_NLE */ \
}
ALWAYS_INLINE
xed_iclass_enum_t ccToXedJump(ConditionCode c) {
assertx(c != CC_None);
static const xed_iclass_enum_t jumps[] = CC_TO_XED_ARRAY(J);
return jumps[(int)c];
}
ALWAYS_INLINE
xed_iclass_enum_t ccToXedSetCC(int c) {
assertx(c != -1);
static const xed_iclass_enum_t setccs[] = CC_TO_XED_ARRAY(SET);
return setccs[c];
}
ALWAYS_INLINE
xed_iclass_enum_t ccToXedCMov(ConditionCode c) {
assertx(c != CC_None);
static const xed_iclass_enum_t cmovs[] = CC_TO_XED_ARRAY(CMOV);
return cmovs[(int)c];
}
// XED emit functions
template<typename F> ALWAYS_INLINE
uint32_t xedEmitImpl(xed_iclass_enum_t instr, CodeAddress destination,
F xedFunc) {
xed_encoder_instruction_t instruction;
xed_encoder_request_t request;
uint32_t encodedSize = 0;
xed_error_enum_t xedError;
xed_bool_t convert_ok;
xedFunc(&instruction);
xed_encoder_request_zero(&request);
convert_ok = xed_convert_to_encoder_request(&request, &instruction);
always_assert(convert_ok && "Unable to convert instruction"
" to encoder request");
xedError = xed_encode(&request, destination, XED_MAX_INSTRUCTION_BYTES,
&encodedSize);
always_assert_flog(xedError == XED_ERROR_NONE,
"XED: Error when encoding {}(): {}",
xed_iclass_enum_t2str(instr),
xed_error_enum_t2str(xedError));
return encodedSize;
}
ALWAYS_INLINE
uint32_t xedEmit(xed_iclass_enum_t instr, xed_uint_t effOperandSizeBits,
CodeAddress destination = nullptr) {
auto size = xedEmitImpl(instr, destination ? destination : dest(),
[&](xed_encoder_instruction_t* i) {
xed_inst0(i, kXedState, instr,
effOperandSizeBits);
});
if (!destination) codeBlock.moveFrontier(size);
return size;
}
ALWAYS_INLINE
uint32_t xedEmit(xed_iclass_enum_t instr,
const xed_encoder_operand_t& op,
xed_uint_t effOperandSizeBits = 0,
CodeAddress destination = nullptr) {
auto size = xedEmitImpl(instr, destination ? destination : dest(),
[&](xed_encoder_instruction_t* i) {
xed_inst1(i, kXedState, instr,
effOperandSizeBits, op);
});
if (!destination) codeBlock.moveFrontier(size);
return size;
}
ALWAYS_INLINE
uint32_t xedEmit(xed_iclass_enum_t instr,
const xed_encoder_operand_t& op_1,
const xed_encoder_operand_t& op_2,
xed_uint_t effOperandSizeBits = 0,
CodeAddress destination = nullptr) {
auto size = xedEmitImpl(instr, destination ? destination : dest(),
[&](xed_encoder_instruction_t* i) {
xed_inst2(i, kXedState, instr,
effOperandSizeBits, op_1, op_2);
});
if (!destination) codeBlock.moveFrontier(size);
return size;
}
ALWAYS_INLINE
uint32_t xedEmit(xed_iclass_enum_t instr,
const xed_encoder_operand_t& op_1,
const xed_encoder_operand_t& op_2,
const xed_encoder_operand_t& op_3,
xed_uint_t effOperandSizeBits = 0,
CodeAddress destination = nullptr) {
auto size = xedEmitImpl(instr, destination ? destination : dest(),
[&](xed_encoder_instruction_t* i) {
xed_inst3(i, kXedState, instr,
effOperandSizeBits,
op_1, op_2, op_3);
});
if (!destination) codeBlock.moveFrontier(size);
return size;
}
public:
static constexpr auto kInt3Size = sz::byte;
static constexpr auto kUd2Size = sz::word;
void emitInt3s(int n) {
if (n == 0) return;
static auto const instr = [&]{
uint8_t int3;
xedEmit(XED_ICLASS_INT3, sz::byte,
reinterpret_cast<CodeAddress>(&int3));
return int3;
}();
for (auto i = 0; i < n; ++i) {
byte(instr);
}
}
void emitNop(int n) {
if (n == 0) return;
static const xed_iclass_enum_t nops[] = {
XED_ICLASS_INVALID,
XED_ICLASS_NOP,
XED_ICLASS_NOP2,
XED_ICLASS_NOP3,
XED_ICLASS_NOP4,
XED_ICLASS_NOP5,
XED_ICLASS_NOP6,
XED_ICLASS_NOP7,
XED_ICLASS_NOP8,
XED_ICLASS_NOP9,
};
// While n >= 9, emit 9 byte NOPs
while (n >= 9) {
xedInstr(XED_ICLASS_NOP9, sz::nosize);
n -= 9;
}
// Emit remaining NOPs (if any)
if (n) {
xedInstr(nops[n], sz::nosize);
}
}
void pad() {
auto remaining = available();
if (remaining == 0) return;
static auto const instrs = [&]{
struct {
uint8_t int3;
uint16_t ud2;
} data;
xedEmit(XED_ICLASS_INT3, sz::nosize,
reinterpret_cast<CodeAddress>(&data.int3));
xedEmit(XED_ICLASS_UD2, sz::nosize,
reinterpret_cast<CodeAddress>(&data.ud2));
return data;
}();
while (remaining >= kUd2Size) {
word(instrs.ud2);
remaining -= kUd2Size;
}
while (remaining >= kInt3Size) {
byte(instrs.int3);
remaining -= kInt3Size;
}
}
ALWAYS_INLINE
XedAssembler& prefix(const MemoryRef& mr) {
return *this;
}
public:
/*
* The following functions use a naming convention for an older API
* to the assembler; conditional loads and moves haven't yet been
* ported.
*/
// CMOVcc [rbase + off], rdest
inline void cload_reg64_disp_reg64(ConditionCode cc, Reg64 rbase,
int off, Reg64 rdest) {
MemoryRef m(DispReg(rbase, off));
xedInstrMR(ccToXedCMov(cc), m, rdest);
}
inline void cload_reg64_disp_reg32(ConditionCode cc, Reg64 rbase,
int off, Reg32 rdest) {
MemoryRef m(DispReg(rbase, off));
xedInstrMR(ccToXedCMov(cc), m, rdest);
}
inline void cmov_reg64_reg64(ConditionCode cc, Reg64 rsrc, Reg64 rdest) {
xedInstrRR(ccToXedCMov(cc), rsrc, rdest);
}
private:
/*
* The following section contains conversion methods that take a Reg8/32/64,
* RegXMM, MemoryRef, RipRelative struct and convert it to a
* xed_encoder_operand_t.
*/
static constexpr int bytesToBits(int sz) {
return (sz << 3);
}
static constexpr int bitsToBytes(int bits) {
return (bits >> 3);
}
union XedImmValue {
int8_t b;
uint8_t ub;
int16_t w;
int32_t l;
int64_t q;
uint64_t uq;
template<typename immtype>
XedImmValue(const immtype& imm, int immSize) {
uq = 0;
switch (immSize) {
case sz::byte:
b = imm.b(); break;
case sz::word:
w = imm.w(); break;
case sz::dword:
l = imm.l(); break;
case sz::qword:
q = imm.q(); break;
}
}
};
xed_reg_enum_t xedFromReg(const Reg64& reg) {
return xed_reg_enum_t(int(reg) + XED_REG_RAX);
}
xed_reg_enum_t xedFromReg(const Reg32& reg) {
return xed_reg_enum_t(int(reg) + XED_REG_EAX);
}
xed_reg_enum_t xedFromReg(const Reg16& reg) {
return xed_reg_enum_t(int(reg) + XED_REG_AX);
}
xed_reg_enum_t xedFromReg(const Reg8& reg) {
auto regid = int(reg);
if ((regid & 0x80) == 0) {
return xed_reg_enum_t(regid + XED_REG_AL);
}
return xed_reg_enum_t((regid - 0x84) + XED_REG_AH);
}
xed_reg_enum_t xedFromReg(const RegXMM& reg) {
return xed_reg_enum_t(int(reg) + XED_REG_XMM0);
}
int getDisplSize(intptr_t value) {
if (value == 0) return sz::nosize;
return deltaFits(value, sz::byte) ? sz::byte : sz::dword;
}
xed_enc_displacement_t xedDispFromValue(intptr_t value, int size) {
switch (size) {
case sz::nosize: return {0, 0};
case sz::byte: return {(xed_uint64_t)safe_cast<int8_t>(value),
(xed_uint32_t)bytesToBits(size)};
default: return {(xed_uint64_t)safe_cast<int32_t>(value),
(xed_uint32_t)bytesToBits(size)};
}
}
xed_enc_displacement_t xedDispFromValue(intptr_t value) {
return xedDispFromValue(value, getDisplSize(value));
}
template<typename regtype>
xed_encoder_operand_t toXedOperand(const regtype& reg) {
return xed_reg(xedFromReg(reg));
}
xed_encoder_operand_t toXedOperand(xed_reg_enum_t reg) {
return xed_reg(reg);
}
xed_encoder_operand_t toXedOperand(const MemoryRef& m, int memSize) {
static const xed_reg_enum_t segmentRegs[] = {
XED_REG_INVALID, //Segment::DS (no segment register override)
XED_REG_FS, //Segment::FS
XED_REG_GS //Segment::GS
};
xed_reg_enum_t base = int(m.r.base) != -1 ?
xedFromReg(m.r.base) : XED_REG_INVALID;
xed_reg_enum_t index = int(m.r.index) != -1 ?
xedFromReg(m.r.index) : XED_REG_INVALID;
return xed_mem_gbisd(segmentRegs[int(m.segment)],
base, index, m.r.scale,
xedDispFromValue(m.r.disp), bytesToBits(memSize));
}
xed_encoder_operand_t toXedOperand(const RIPRelativeRef& r, int memSize) {
return xed_mem_bd(XED_REG_RIP, xedDispFromValue(r.r.disp, sz::dword),
bytesToBits(memSize));
}
xed_encoder_operand_t toXedOperand(CodeAddress address, int size) {
return xed_relbr(safe_cast<int32_t>((int64_t)address), bytesToBits(size));
}
template<typename immtype>
xed_encoder_operand_t toXedOperand(const immtype& immed, int immSizes) {
assert((immSizes != 0) &&
(immSizes & ~(sz::byte | sz::word | sz::dword | sz::qword)) == 0);
if ((immSizes & (immSizes - 1)) != 0) {
immSizes = reduceImmSize(immed.q(), immSizes);
}
return xed_imm0(XedImmValue(immed, immSizes).uq, bytesToBits(immSizes));
}
ALWAYS_INLINE
int reduceImmSize(int64_t value, int allowedSizes) {
while (allowedSizes) {
int crtSize = (allowedSizes & -allowedSizes);
if (crtSize == sz::qword || deltaFits(value, crtSize)) {
return crtSize;
}
allowedSizes ^= crtSize;
}
assertx(false && "Could not find an optimal size for Immed");
return sz::nosize;
}
/*
* Cache sizes for instruction types in a certain xedInstr context.
* This helps with emitting instructions where you need to know in advance
* the length of the instruction being emitted (such as when one of
* the operands is a RIPRelativeRef) by caching the size of the instruction
* and removing the need to call xedEmit twice each time (once to get
* the size, and once to actually emit the instruction).
*/
typedef tbb::concurrent_unordered_map<int32_t, uint32_t> XedLenCache;
template<typename F> ALWAYS_INLINE
uint32_t xedCacheLen(XedLenCache* lenCache, F xedFunc, uint32_t key) {
auto res = lenCache->find(key);
if (res != lenCache->end()) {
return res->second;
}
auto instrLen = xedFunc();
lenCache->insert({key, instrLen});
return instrLen;
}
static constexpr uint32_t xedLenCacheKey(xed_iclass_enum_t instr,
uint32_t size) {
// 16 bits should fit a xed_iclass_enum_t value (there are currently ~1560
// distinct values).
return uint32_t(instr) | (size << 16);
}
// XEDInstr* wrappers
#define XED_WRAP_IMPL() \
XED_WRAP_X(64) \
XED_WRAP_X(32) \
XED_WRAP_X(16) \
XED_WRAP_X(8)
// instr(reg)
#define XED_INSTR_WRAPPER_IMPL(bitsize) \
ALWAYS_INLINE \
void xedInstrR(xed_iclass_enum_t instr, const Reg##bitsize& r) { \
xedEmit(instr, toXedOperand(r), bitsize); \
}
#define XED_WRAP_X XED_INSTR_WRAPPER_IMPL
XED_WRAP_IMPL()
#undef XED_WRAP_X
// instr(imm, reg)
#define XED_INSTIR_WRAPPER_IMPL(bitsize) \
ALWAYS_INLINE \
void xedInstrIR(xed_iclass_enum_t instr, const Immed& i, \
const Reg##bitsize& r, \
int immSize = bitsToBytes(bitsize)) { \
xedEmit(instr, toXedOperand(r), toXedOperand(i, immSize), \
bitsize); \
} \
#define XED_WRAP_X XED_INSTIR_WRAPPER_IMPL
XED_WRAP_IMPL()
#undef XED_WRAP_X
ALWAYS_INLINE
void xedInstrIR(xed_iclass_enum_t instr, const Immed64& i, const Reg64& r) {
xedEmit(instr, toXedOperand(r), toXedOperand(i, sz::qword),
bytesToBits(sz::qword));
}
ALWAYS_INLINE
void xedInstrIR(xed_iclass_enum_t instr, const Immed64& i, const Reg64& r,
int immSize) {
xedEmit(instr, toXedOperand(r), toXedOperand(i, immSize),
bytesToBits(sz::qword));
}
ALWAYS_INLINE
void xedInstrIR(xed_iclass_enum_t instr, const Immed& i,
const RegXMM& r, int immSize) {
xedEmit(instr, toXedOperand(r), toXedOperand(i, immSize));
}
// instr(reg, reg)
#define XED_INSTRR_WRAPPER_IMPL(bitsize) \
ALWAYS_INLINE \
void xedInstrRR(xed_iclass_enum_t instr, const Reg##bitsize& r1, \
const Reg##bitsize& r2) { \
xedEmit(instr, toXedOperand(r2), toXedOperand(r1), bitsize); \
}
#define XED_WRAP_X XED_INSTRR_WRAPPER_IMPL
XED_WRAP_IMPL()
#undef XED_WRAP_X
ALWAYS_INLINE
void xedInstrRR_CL(xed_iclass_enum_t instr, const Reg64& r) {
xedEmit(instr, toXedOperand(r), toXedOperand(XED_REG_CL),
bytesToBits(sz::qword));
}
ALWAYS_INLINE
void xedInstrRR(xed_iclass_enum_t instr, const Reg8& r1, const Reg32& r2,
int size = sz::dword) {
xedEmit(instr, toXedOperand(r2), toXedOperand(r1), bytesToBits(size));
}
ALWAYS_INLINE
void xedInstrRR(xed_iclass_enum_t instr, const Reg16& r1, const Reg32& r2,
int size = sz::dword) {
xedEmit(instr, toXedOperand(r2), toXedOperand(r1), bytesToBits(size));
}
ALWAYS_INLINE
void xedInstrRR(xed_iclass_enum_t instr, const Reg8& r1, const Reg64& r2,
int size = sz::qword) {
xedEmit(instr, toXedOperand(r2), toXedOperand(r1), bytesToBits(size));
}
ALWAYS_INLINE
void xedInstrRR(xed_iclass_enum_t instr, const Reg64& r1, const RegXMM& r2) {
xedEmit(instr, toXedOperand(r2), toXedOperand(r1));
}
ALWAYS_INLINE
void xedInstrRR(xed_iclass_enum_t instr, const RegXMM& r1, const Reg64& r2) {
xedEmit(instr, toXedOperand(r2), toXedOperand(r1));
}
// Most instr(xmm_1, xmm_2) instructions take operands in reverse order
// compared to instr(reg_1, reg_2): source and destination are swapped
ALWAYS_INLINE
void xedInstrRR(xed_iclass_enum_t instr, const RegXMM& r1, const RegXMM& r2) {
xedEmit(instr, toXedOperand(r1), toXedOperand(r2));
}
// instr(imm)
ALWAYS_INLINE
void xedInstrI(xed_iclass_enum_t instr, const Immed& i, int immSize) {
xed_encoder_operand_t op = toXedOperand(i, immSize);
xedEmit(instr, op, op.width_bits);
}
// instr(mem)
ALWAYS_INLINE
void xedInstrM(xed_iclass_enum_t instr, const MemoryRef& m,
int size = sz::qword) {
xedEmit(instr, toXedOperand(m, size), bytesToBits(size));
}
ALWAYS_INLINE
void xedInstrM(xed_iclass_enum_t instr, RIPRelativeRef m,
int size = sz::qword) {
static XedLenCache lenCache;
auto instrLen = xedCacheLen(
&lenCache,
[&]() {
return xedEmit(instr, toXedOperand(nullrip, size),
bytesToBits(size), dest());
}, xedLenCacheKey(instr, 0));
m.r.disp -= ((int64_t)frontier() + (int64_t)instrLen);
xedEmit(instr, toXedOperand(m, size), bytesToBits(size));
}
// instr(imm, mem)
ALWAYS_INLINE
void xedInstrIM(xed_iclass_enum_t instr, const Immed& i, const MemoryRef& m,
int size = sz::qword) {
assert(size && (size & (size - 1)) == 0);
xedEmit(instr, toXedOperand(m, size), toXedOperand(i, size),
bytesToBits(size));
}
ALWAYS_INLINE
void xedInstrIM(xed_iclass_enum_t instr, const Immed& i, const MemoryRef& m,
int immSize, int memSize) {
xedEmit(instr, toXedOperand(m, memSize), toXedOperand(i, immSize),
bytesToBits(memSize));
}
// instr(mem, reg)
#define XED_INSTMR_WRAPPER_IMPL(bitsize) \
ALWAYS_INLINE \
void xedInstrMR(xed_iclass_enum_t instr, const MemoryRef& m, \
const Reg##bitsize& r, \
int memSize = bitsToBytes(bitsize)) { \
xedEmit(instr, toXedOperand(r), toXedOperand(m, memSize), bitsize); \
} \
\
ALWAYS_INLINE \
void xedInstrMR(xed_iclass_enum_t instr, RIPRelativeRef m, \
const Reg##bitsize& r) { \
static XedLenCache lenCache; \
auto instrLen = xedCacheLen( \
&lenCache, \
[&] { \
return xedEmit( \
instr, toXedOperand(r), \
toXedOperand(nullrip, \
bitsToBytes(bitsize)), \
bitsize, dest()); \
}, xedLenCacheKey(instr, 0)); \
m.r.disp -= ((int64_t)frontier() + (int64_t)instrLen); \
xedEmit(instr, toXedOperand(r), \
toXedOperand(m, bitsToBytes(bitsize)), bitsize); \
}
#define XED_WRAP_X XED_INSTMR_WRAPPER_IMPL
XED_WRAP_IMPL()
#undef XED_WRAP_X
ALWAYS_INLINE
void xedInstrMR(xed_iclass_enum_t instr, const MemoryRef& m,
const RegXMM& r, int memSize = sz::qword) {
xedEmit(instr, toXedOperand(r), toXedOperand(m, memSize));
}
ALWAYS_INLINE
void xedInstrMR(xed_iclass_enum_t instr, RIPRelativeRef m,
const RegXMM& r, int memSize = sz::qword) {
static XedLenCache lenCache;
auto instrLen = xedCacheLen(
&lenCache,
[&]() {
return xedEmit(
instr, toXedOperand(r),
toXedOperand(nullrip, memSize),
0, dest());
}, xedLenCacheKey(instr, 0));
m.r.disp -= ((int64_t)frontier() + (int64_t)instrLen);
xedEmit(instr, toXedOperand(r), toXedOperand(m, memSize));
}
// instr(reg, mem)
#define XED_INSTRM_WRAPPER_IMPL(bitsize) \
ALWAYS_INLINE \
void xedInstrRM(xed_iclass_enum_t instr, const Reg##bitsize& r, \
const MemoryRef& m) { \
xedEmit(instr, toXedOperand(m, bitsToBytes(bitsize)), \
toXedOperand(r), bitsize); \
}
#define XED_WRAP_X XED_INSTRM_WRAPPER_IMPL
XED_WRAP_IMPL()
#undef XED_WRAP_X
ALWAYS_INLINE
void xedInstrRM(xed_iclass_enum_t instr, const RegXMM& r,
const MemoryRef& m, int memSize = sz::qword) {
xedEmit(instr, toXedOperand(m, memSize), toXedOperand(r));
}
// instr(xmm, xmm, imm)
ALWAYS_INLINE
void xedInstrIRR(xed_iclass_enum_t instr, const RegXMM& r1, const RegXMM& r2,
const Immed& i, int immSize) {
xedEmit(instr, toXedOperand(r1), toXedOperand(r2),
toXedOperand(i, immSize));
}
// instr(relbr)
void xedInstrRelBr(xed_iclass_enum_t instr,
CodeAddress destination, int size) {
static XedLenCache lenCache;
auto instrLen = xedCacheLen(
&lenCache,
[&]() {
return xedEmit(instr, toXedOperand((CodeAddress)0,
size), 0, dest());
}, xedLenCacheKey(instr, size));
auto target = destination - (frontier() + instrLen);
xedEmit(instr, toXedOperand((CodeAddress)target, size));
}
// instr()
ALWAYS_INLINE
void xedInstr(xed_iclass_enum_t instr, int size = sz::qword) {
xedEmit(instr, bytesToBits(size));
}
};
}