void EncoderBase::buildMnemonicDesc()

in vm/port/src/encoder/ia32_em64t/enc_tabl.cpp [1678:1867]
137 lines of code
42 McCabe index (conditional complexity)

void EncoderBase::buildMnemonicDesc(const MnemonicInfo * minfo)
{
    MnemonicDesc& mdesc = mnemonics[minfo->mn];
    mdesc.mn = minfo->mn;
    mdesc.flags = minfo->flags;
    mdesc.roles = minfo->roles;
    mdesc.name = minfo->name;
    
    //
    // fill the used opcodes
    //
    for (unsigned i=0, oindex=0; i<COUNTOF(minfo->opcodes); i++) {
    
        const OpcodeInfo& oinfo = minfo->opcodes[i];
        OpcodeDesc& odesc = opcodes[minfo->mn][oindex];
        // last opcode ?
        if (oinfo.opcode[0] == OpcodeByteKind_LAST) {
            // mark the opcode 'last', exit
            odesc.opcode_len = 0;
            odesc.last = 1;
            break;
        }
        odesc.last = 0;
#ifdef _EM64T_
        if (oinfo.platf == OpcodeInfo::ia32) { continue; }
        if (oinfo.platf == OpcodeInfo::decoder32) { continue; }
#else
        if (oinfo.platf == OpcodeInfo::em64t) { continue; }
        if (oinfo.platf == OpcodeInfo::decoder64) { continue; }
#endif
        if (oinfo.platf == OpcodeInfo::decoder64 ||
            oinfo.platf == OpcodeInfo::decoder32) {
             odesc.platf = OpcodeInfo::decoder;
        }
        else {
            odesc.platf = (char)oinfo.platf;
        }
        //
        // fill out opcodes
        //
        unsigned j = 0;
        odesc.opcode_len = 0;
        for(; oinfo.opcode[j]; j++) {
            unsigned opcod = oinfo.opcode[j];
            unsigned kind = opcod&OpcodeByteKind_KindMask;
            if (kind == OpcodeByteKind_REX_W) {
                odesc.opcode[odesc.opcode_len++] = (unsigned char)0x48;
                continue;
            }
            else if(kind != 0 && kind != OpcodeByteKind_ZeroOpcodeByte) {
                break;
            }
            unsigned lowByte = (opcod & OpcodeByteKind_OpcodeMask);
            odesc.opcode[odesc.opcode_len++] = (unsigned char)lowByte;
        }
        assert(odesc.opcode_len<5);
        odesc.aux0 = odesc.aux1 = 0;
        if (oinfo.opcode[j] != 0) {
            odesc.aux0 = oinfo.opcode[j];
            assert((odesc.aux0 & OpcodeByteKind_KindMask) != 0);
            ++j;
            if(oinfo.opcode[j] != 0) {
                odesc.aux1 = oinfo.opcode[j];
                assert((odesc.aux1 & OpcodeByteKind_KindMask) != 0);
            }
        }
        else if (oinfo.roles.count>=2) {
            if (((oinfo.opnds[0].kind&OpndKind_Mem) && 
                 (isRegKind(oinfo.opnds[1].kind))) ||
                ((oinfo.opnds[1].kind&OpndKind_Mem) && 
                 (isRegKind(oinfo.opnds[0].kind)))) {
                 // Example: MOVQ xmm1, xmm/m64 has only opcodes
                 // same with SHRD
                 // Adding fake /r
                 odesc.aux0 = _r;
            }
        }
        else if (oinfo.roles.count==1) {
            if (oinfo.opnds[0].kind&OpndKind_Mem) {
                 // Example: SETcc r/m8, adding fake /0
                 odesc.aux0 = _0;
            }
        }
        // check imm
        if (oinfo.roles.count > 0 && 
            (oinfo.opnds[0].kind == OpndKind_Imm ||
            oinfo.opnds[oinfo.roles.count-1].kind == OpndKind_Imm)) {
            // Example: CALL cd, PUSH imm32 - they fit both opnds[0] and
            // opnds[oinfo.roles.count-1].
            // The A3 opcode fits only opnds[0] - it's currently have
            // MOV imm32, EAX. Looks ridiculous, but this is how the
            // moffset is currently implemented. Will need to fix together
            // with other usages of moff.
            // adding fake /cd or fake /id
            unsigned imm_opnd_index =
                oinfo.opnds[0].kind == OpndKind_Imm ? 0 : oinfo.roles.count-1;
            OpndSize sz = oinfo.opnds[imm_opnd_index].size;
            unsigned imm_encode, coff_encode;
            if (sz==OpndSize_8) {imm_encode = ib; coff_encode=cb; }
            else if (sz==OpndSize_16) {imm_encode = iw; coff_encode=cw;}
            else if (sz==OpndSize_32) {imm_encode = id; coff_encode=cd; }
            else if (sz==OpndSize_64) {imm_encode = io; coff_encode=0xCC; }
            else { assert(false); imm_encode=0xCC; coff_encode=0xCC; }
            if (odesc.aux1 == 0) {
                if (odesc.aux0==0) {
                    odesc.aux0 = imm_encode;
                }
                else {
                    if (odesc.aux0 != imm_encode && odesc.aux0 != coff_encode) {
                        odesc.aux1 = imm_encode;
                    }
                }
            }
            else {
                assert(odesc.aux1==imm_encode);
            }
            
        }
        
        assert(sizeof(odesc.opnds) == sizeof(oinfo.opnds));
        memcpy(odesc.opnds, oinfo.opnds, sizeof(odesc.opnds));
        odesc.roles = oinfo.roles;
        odesc.first_opnd = 0;
        if (odesc.opnds[0].reg != RegName_Null) {
            ++odesc.first_opnd;
            if (odesc.opnds[1].reg != RegName_Null) {
                ++odesc.first_opnd;
            }
        }

        if (odesc.platf == OpcodeInfo::decoder) {
            // if the opcode is only for decoding info, then do not hash it.
            ++oindex;
            continue;
        }
       
        //
        // check whether the operand info is a mask (i.e. r_m*).
        // in this case, split the info to have separate entries for 'r' 
        // and for 'm'.
        // the good news is that there can be only one such operand.
        // 
        int opnd2split = -1;
        for (unsigned k=0; k<oinfo.roles.count; k++) {
            if ((oinfo.opnds[k].kind & OpndKind_Mem) &&
                (OpndKind_Mem != oinfo.opnds[k].kind)) {
                opnd2split = k;
                break;
            }
        };

        if (opnd2split == -1) {
            // not a mask, hash it, store it, continue.
            unsigned short hash = getHash(&oinfo);
            opcodesHashMap[minfo->mn][hash] = (unsigned char)oindex;
            ++oindex;
            continue;
        };

        OpcodeInfo storeItem = oinfo;
        unsigned short hash;

        // remove the memory part of the mask, and store only 'r' part
        storeItem.opnds[opnd2split].kind = (OpndKind)(storeItem.opnds[opnd2split].kind & ~OpndKind_Mem);
        hash = getHash(&storeItem);
        if (opcodesHashMap[minfo->mn][hash] == NOHASH) {
            opcodesHashMap[minfo->mn][hash] = (unsigned char)oindex;
        }
        // else {
        // do not overwrite if there is something there, just check that operands match
        // the reason is that for some instructions there are several possibilities:
        // say 'DEC r' may be encode as either '48+r' or 'FF /1', and I believe 
        // the first one is better for 'dec r'.
        // as we're currently processing an opcode with memory part in operand, 
        // leave already filled items intact, so if there is 'OP reg' there, this
        // better choice will be left in the table instead of 'OP r_m'
        // }

        // compute hash of memory-based operand, 'm' part in 'r_m'
        storeItem.opnds[opnd2split].kind = OpndKind_Mem;
        hash = getHash(&storeItem);
        // should not happen: for the r_m opcodes, there is a possibility 
        // that hash value of 'r' part intersects with 'OP r' value, but it's 
        // impossible for 'm' part.
        assert(opcodesHashMap[minfo->mn][hash] == NOHASH);
        opcodesHashMap[minfo->mn][hash] = (unsigned char)oindex;
        
        ++oindex;
    }
}