in vm/port/src/encoder/ia32_em64t/enc_tabl.cpp [1678:1867]
void EncoderBase::buildMnemonicDesc(const MnemonicInfo * minfo)
{
MnemonicDesc& mdesc = mnemonics[minfo->mn];
mdesc.mn = minfo->mn;
mdesc.flags = minfo->flags;
mdesc.roles = minfo->roles;
mdesc.name = minfo->name;
//
// fill the used opcodes
//
for (unsigned i=0, oindex=0; i<COUNTOF(minfo->opcodes); i++) {
const OpcodeInfo& oinfo = minfo->opcodes[i];
OpcodeDesc& odesc = opcodes[minfo->mn][oindex];
// last opcode ?
if (oinfo.opcode[0] == OpcodeByteKind_LAST) {
// mark the opcode 'last', exit
odesc.opcode_len = 0;
odesc.last = 1;
break;
}
odesc.last = 0;
#ifdef _EM64T_
if (oinfo.platf == OpcodeInfo::ia32) { continue; }
if (oinfo.platf == OpcodeInfo::decoder32) { continue; }
#else
if (oinfo.platf == OpcodeInfo::em64t) { continue; }
if (oinfo.platf == OpcodeInfo::decoder64) { continue; }
#endif
if (oinfo.platf == OpcodeInfo::decoder64 ||
oinfo.platf == OpcodeInfo::decoder32) {
odesc.platf = OpcodeInfo::decoder;
}
else {
odesc.platf = (char)oinfo.platf;
}
//
// fill out opcodes
//
unsigned j = 0;
odesc.opcode_len = 0;
for(; oinfo.opcode[j]; j++) {
unsigned opcod = oinfo.opcode[j];
unsigned kind = opcod&OpcodeByteKind_KindMask;
if (kind == OpcodeByteKind_REX_W) {
odesc.opcode[odesc.opcode_len++] = (unsigned char)0x48;
continue;
}
else if(kind != 0 && kind != OpcodeByteKind_ZeroOpcodeByte) {
break;
}
unsigned lowByte = (opcod & OpcodeByteKind_OpcodeMask);
odesc.opcode[odesc.opcode_len++] = (unsigned char)lowByte;
}
assert(odesc.opcode_len<5);
odesc.aux0 = odesc.aux1 = 0;
if (oinfo.opcode[j] != 0) {
odesc.aux0 = oinfo.opcode[j];
assert((odesc.aux0 & OpcodeByteKind_KindMask) != 0);
++j;
if(oinfo.opcode[j] != 0) {
odesc.aux1 = oinfo.opcode[j];
assert((odesc.aux1 & OpcodeByteKind_KindMask) != 0);
}
}
else if (oinfo.roles.count>=2) {
if (((oinfo.opnds[0].kind&OpndKind_Mem) &&
(isRegKind(oinfo.opnds[1].kind))) ||
((oinfo.opnds[1].kind&OpndKind_Mem) &&
(isRegKind(oinfo.opnds[0].kind)))) {
// Example: MOVQ xmm1, xmm/m64 has only opcodes
// same with SHRD
// Adding fake /r
odesc.aux0 = _r;
}
}
else if (oinfo.roles.count==1) {
if (oinfo.opnds[0].kind&OpndKind_Mem) {
// Example: SETcc r/m8, adding fake /0
odesc.aux0 = _0;
}
}
// check imm
if (oinfo.roles.count > 0 &&
(oinfo.opnds[0].kind == OpndKind_Imm ||
oinfo.opnds[oinfo.roles.count-1].kind == OpndKind_Imm)) {
// Example: CALL cd, PUSH imm32 - they fit both opnds[0] and
// opnds[oinfo.roles.count-1].
// The A3 opcode fits only opnds[0] - it's currently have
// MOV imm32, EAX. Looks ridiculous, but this is how the
// moffset is currently implemented. Will need to fix together
// with other usages of moff.
// adding fake /cd or fake /id
unsigned imm_opnd_index =
oinfo.opnds[0].kind == OpndKind_Imm ? 0 : oinfo.roles.count-1;
OpndSize sz = oinfo.opnds[imm_opnd_index].size;
unsigned imm_encode, coff_encode;
if (sz==OpndSize_8) {imm_encode = ib; coff_encode=cb; }
else if (sz==OpndSize_16) {imm_encode = iw; coff_encode=cw;}
else if (sz==OpndSize_32) {imm_encode = id; coff_encode=cd; }
else if (sz==OpndSize_64) {imm_encode = io; coff_encode=0xCC; }
else { assert(false); imm_encode=0xCC; coff_encode=0xCC; }
if (odesc.aux1 == 0) {
if (odesc.aux0==0) {
odesc.aux0 = imm_encode;
}
else {
if (odesc.aux0 != imm_encode && odesc.aux0 != coff_encode) {
odesc.aux1 = imm_encode;
}
}
}
else {
assert(odesc.aux1==imm_encode);
}
}
assert(sizeof(odesc.opnds) == sizeof(oinfo.opnds));
memcpy(odesc.opnds, oinfo.opnds, sizeof(odesc.opnds));
odesc.roles = oinfo.roles;
odesc.first_opnd = 0;
if (odesc.opnds[0].reg != RegName_Null) {
++odesc.first_opnd;
if (odesc.opnds[1].reg != RegName_Null) {
++odesc.first_opnd;
}
}
if (odesc.platf == OpcodeInfo::decoder) {
// if the opcode is only for decoding info, then do not hash it.
++oindex;
continue;
}
//
// check whether the operand info is a mask (i.e. r_m*).
// in this case, split the info to have separate entries for 'r'
// and for 'm'.
// the good news is that there can be only one such operand.
//
int opnd2split = -1;
for (unsigned k=0; k<oinfo.roles.count; k++) {
if ((oinfo.opnds[k].kind & OpndKind_Mem) &&
(OpndKind_Mem != oinfo.opnds[k].kind)) {
opnd2split = k;
break;
}
};
if (opnd2split == -1) {
// not a mask, hash it, store it, continue.
unsigned short hash = getHash(&oinfo);
opcodesHashMap[minfo->mn][hash] = (unsigned char)oindex;
++oindex;
continue;
};
OpcodeInfo storeItem = oinfo;
unsigned short hash;
// remove the memory part of the mask, and store only 'r' part
storeItem.opnds[opnd2split].kind = (OpndKind)(storeItem.opnds[opnd2split].kind & ~OpndKind_Mem);
hash = getHash(&storeItem);
if (opcodesHashMap[minfo->mn][hash] == NOHASH) {
opcodesHashMap[minfo->mn][hash] = (unsigned char)oindex;
}
// else {
// do not overwrite if there is something there, just check that operands match
// the reason is that for some instructions there are several possibilities:
// say 'DEC r' may be encode as either '48+r' or 'FF /1', and I believe
// the first one is better for 'dec r'.
// as we're currently processing an opcode with memory part in operand,
// leave already filled items intact, so if there is 'OP reg' there, this
// better choice will be left in the table instead of 'OP r_m'
// }
// compute hash of memory-based operand, 'm' part in 'r_m'
storeItem.opnds[opnd2split].kind = OpndKind_Mem;
hash = getHash(&storeItem);
// should not happen: for the r_m opcodes, there is a possibility
// that hash value of 'r' part intersects with 'OP r' value, but it's
// impossible for 'm' part.
assert(opcodesHashMap[minfo->mn][hash] == NOHASH);
opcodesHashMap[minfo->mn][hash] = (unsigned char)oindex;
++oindex;
}
}