size_t emitter::emitOutputInstr()

in src/coreclr/jit/emitarm.cpp [5756:6721]


size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
{
    BYTE*         dst           = *dp;
    BYTE*         odst          = dst;
    code_t        code          = 0;
    size_t        sz            = 0;
    instruction   ins           = id->idIns();
    insFormat     fmt           = id->idInsFmt();
    emitAttr      size          = id->idOpSize();
    unsigned char callInstrSize = 0;

#ifdef DEBUG
    bool dspOffs = emitComp->opts.dspGCtbls || !emitComp->opts.disDiffable;
#endif // DEBUG

    assert(REG_NA == (int)REG_NA);

    VARSET_TP GCvars(VarSetOps::UninitVal());
    regMaskTP gcrefRegs;
    regMaskTP byrefRegs;

    /* What instruction format have we got? */

    switch (fmt)
    {
        int   imm;
        BYTE* addr;

        case IF_T1_A: // T1_A    ................
            sz   = SMALL_IDSC_SIZE;
            code = emitInsCode(ins, fmt);
            dst += emitOutput_Thumb1Instr(dst, code);
            break;

#ifdef FEATURE_ITINSTRUCTION
        case IF_T1_B: // T1_B    ........cccc....                                           cond
        {
            assert(id->idGCref() == GCT_NONE);
            target_ssize_t condcode = emitGetInsSC(id);
            dst                     = emitOutputIT(dst, ins, fmt, condcode);
            sz                      = SMALL_IDSC_SIZE;
        }
        break;
#endif // FEATURE_ITINSTRUCTION

        case IF_T1_C: // T1_C    .....iiiiinnnddd                       R1  R2              imm5
            sz   = SMALL_IDSC_SIZE;
            imm  = emitGetInsSC(id);
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT1_D3(id->idReg1());
            code |= insEncodeRegT1_N3(id->idReg2());
            imm = insUnscaleImm(ins, imm);
            assert((imm & 0x001f) == imm);
            code |= (imm << 6);
            dst += emitOutput_Thumb1Instr(dst, code);
            break;

        case IF_T1_D0: // T1_D0   ........Dmmmmddd                       R1* R2*
            sz   = SMALL_IDSC_SIZE;
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT1_D4(id->idReg1());
            code |= insEncodeRegT1_M4(id->idReg2());
            dst += emitOutput_Thumb1Instr(dst, code);
            break;

        case IF_T1_E: // T1_E    ..........nnnddd                       R1  R2
            sz   = SMALL_IDSC_SIZE;
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT1_D3(id->idReg1());
            code |= insEncodeRegT1_N3(id->idReg2());
            dst += emitOutput_Thumb1Instr(dst, code);
            break;

        case IF_T1_F: // T1_F    .........iiiiiii                       SP                  imm7
            sz   = emitGetInstrDescSize(id);
            imm  = emitGetInsSC(id);
            code = emitInsCode(ins, fmt);
            assert((ins == INS_add) || (ins == INS_sub));
            assert((imm & 0x0003) == 0);
            imm >>= 2;
            assert((imm & 0x007F) == imm);
            code |= imm;
            dst += emitOutput_Thumb1Instr(dst, code);
            break;

        case IF_T1_G: // T1_G    .......iiinnnddd                       R1  R2              imm3
            sz   = SMALL_IDSC_SIZE;
            imm  = emitGetInsSC(id);
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT1_D3(id->idReg1());
            code |= insEncodeRegT1_N3(id->idReg2());
            assert((imm & 0x0007) == imm);
            code |= (imm << 6);
            dst += emitOutput_Thumb1Instr(dst, code);
            break;

        case IF_T1_H: // T1_H    .......mmmnnnddd                       R1  R2  R3
            sz   = emitGetInstrDescSize(id);
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT1_D3(id->idReg1());
            code |= insEncodeRegT1_N3(id->idReg2());
            code |= insEncodeRegT1_M3(id->idReg3());
            dst += emitOutput_Thumb1Instr(dst, code);
            break;

        case IF_T1_I: // T1_I    ......i.iiiiiddd                       R1                  imm6
            assert(id->idIsBound());

            dst = emitOutputLJ(ig, dst, id);
            sz  = sizeof(instrDescJmp);
            break;

        case IF_T1_J0: // T1_J0   .....dddiiiiiiii                       R1                  imm8
        case IF_T1_J1: // T1_J1   .....dddiiiiiiii                       R1                  <regmask8>
        case IF_T1_J2: // T1_J2   .....dddiiiiiiii                       R1  SP              imm8
            sz   = emitGetInstrDescSize(id);
            imm  = emitGetInsSC(id);
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT1_DI(id->idReg1());
            if (fmt == IF_T1_J2)
            {
                assert((ins == INS_add) || (ins == INS_ldr) || (ins == INS_str));
                assert((imm & 0x0003) == 0);
                imm >>= 2;
            }
            assert((imm & 0x00ff) == imm);
            code |= imm;
            dst += emitOutput_Thumb1Instr(dst, code);
            break;

        case IF_T1_L0: // T1_L0   ........iiiiiiii                                           imm8
        case IF_T1_L1: // T1_L1   .......Rrrrrrrrr                                           <regmask8>
            sz   = emitGetInstrDescSize(id);
            imm  = emitGetInsSC(id);
            code = emitInsCode(ins, fmt);
            if (fmt == IF_T1_L1)
            {
                assert((imm & 0x3) != 0x3);
                if (imm & 0x3)
                    code |= 0x0100; //  R bit
                imm >>= 2;
            }
            assert((imm & 0x00ff) == imm);
            code |= imm;
            dst += emitOutput_Thumb1Instr(dst, code);
            break;

        case IF_T2_A: // T2_A    ................ ................
            sz   = SMALL_IDSC_SIZE;
            code = emitInsCode(ins, fmt);
            dst += emitOutput_Thumb2Instr(dst, code);
            break;

        case IF_T2_B: // T2_B    ................ ............iiii                          imm4
            sz   = SMALL_IDSC_SIZE;
            imm  = emitGetInsSC(id);
            code = emitInsCode(ins, fmt);
            assert((imm & 0x000F) == imm);
            code |= imm;
            dst += emitOutput_Thumb2Instr(dst, code);
            break;

        case IF_T2_C0: // T2_C0   ...........Snnnn .iiiddddiishmmmm       R1  R2  R3      S, imm5, sh
        case IF_T2_C4: // T2_C4   ...........Snnnn ....dddd....mmmm       R1  R2  R3      S
        case IF_T2_C5: // T2_C5   ............nnnn ....dddd....mmmm       R1  R2  R3
            sz   = emitGetInstrDescSize(id);
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT2_D(id->idReg1());
            code |= insEncodeRegT2_N(id->idReg2());
            code |= insEncodeRegT2_M(id->idReg3());
            if (fmt != IF_T2_C5)
                code |= insEncodeSetFlags(id->idInsFlags());
            if (fmt == IF_T2_C0)
            {
                imm = emitGetInsSC(id);
                code |= insEncodeShiftCount(imm);
                code |= insEncodeShiftOpts(id->idInsOpt());
            }
            dst += emitOutput_Thumb2Instr(dst, code);
            break;

        case IF_T2_C1: // T2_C1   ...........S.... .iiiddddiishmmmm       R1  R2          S, imm5, sh
        case IF_T2_C2: // T2_C2   ...........S.... .iiiddddii..mmmm       R1  R2          S, imm5
        case IF_T2_C6: // T2_C6   ................ ....dddd..iimmmm       R1  R2                   imm2
            sz   = SMALL_IDSC_SIZE;
            imm  = emitGetInsSC(id);
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT2_D(id->idReg1());
            code |= insEncodeRegT2_M(id->idReg2());
            if (fmt == IF_T2_C6)
            {
                assert((imm & 0x0018) == imm);
                code |= (imm << 1);
            }
            else
            {
                code |= insEncodeSetFlags(id->idInsFlags());
                code |= insEncodeShiftCount(imm);
                if (fmt == IF_T2_C1)
                    code |= insEncodeShiftOpts(id->idInsOpt());
            }
            dst += emitOutput_Thumb2Instr(dst, code);
            break;

        case IF_T2_C3: // T2_C3   ...........S.... ....dddd....mmmm       R1  R2          S
            sz   = SMALL_IDSC_SIZE;
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT2_D(id->idReg1());
            code |= insEncodeRegT2_M(id->idReg2());
            code |= insEncodeSetFlags(id->idInsFlags());
            dst += emitOutput_Thumb2Instr(dst, code);
            break;

        case IF_T2_C7: // T2_C7   ............nnnn ..........shmmmm       R1  R2                   imm2
        case IF_T2_C8: // T2_C8   ............nnnn .iii....iishmmmm       R1  R2             imm5, sh
            sz   = SMALL_IDSC_SIZE;
            imm  = emitGetInsSC(id);
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT2_N(id->idReg1());
            code |= insEncodeRegT2_M(id->idReg2());
            if (fmt == IF_T2_C7)
            {
                assert((imm & 0x0003) == imm);
                code |= (imm << 4);
            }
            else if (fmt == IF_T2_C8)
            {
                code |= insEncodeShiftCount(imm);
                code |= insEncodeShiftOpts(id->idInsOpt());
            }
            dst += emitOutput_Thumb2Instr(dst, code);
            break;

        case IF_T2_C9: // T2_C9   ............nnnn ............mmmm       R1  R2
            sz   = SMALL_IDSC_SIZE;
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT2_N(id->idReg1());
            code |= insEncodeRegT2_M(id->idReg2());
            dst += emitOutput_Thumb2Instr(dst, code);
            break;

        case IF_T2_C10: // T2_C10  ............mmmm ....dddd....mmmm       R1  R2
            sz   = SMALL_IDSC_SIZE;
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT2_D(id->idReg1());
            code |= insEncodeRegT2_M(id->idReg2());
            code |= insEncodeRegT2_N(id->idReg2());
            dst += emitOutput_Thumb2Instr(dst, code);
            break;

        case IF_T2_D0: // T2_D0   ............nnnn .iiiddddii.wwwww       R1  R2             imm5, imm5
        case IF_T2_D1: // T2_D1   ................ .iiiddddii.wwwww       R1                 imm5, imm5
            sz   = SMALL_IDSC_SIZE;
            imm  = emitGetInsSC(id);
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT2_D(id->idReg1());
            if (fmt == IF_T2_D0)
                code |= insEncodeRegT2_N(id->idReg2());
            code |= insEncodeBitFieldImm(imm);
            dst += emitOutput_Thumb2Instr(dst, code);
            break;

        case IF_T2_E0: // T2_E0   ............nnnn tttt......shmmmm       R1  R2  R3               imm2
        case IF_T2_E1: // T2_E1   ............nnnn tttt............       R1  R2
        case IF_T2_E2: // T2_E2   ................ tttt............       R1
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT2_T(id->idReg1());
            if (fmt == IF_T2_E0)
            {
                sz = emitGetInstrDescSize(id);
                code |= insEncodeRegT2_N(id->idReg2());
                if (id->idIsLclVar())
                {
                    code |= insEncodeRegT2_M(codeGen->rsGetRsvdReg());
                    imm = 0;
                }
                else
                {
                    code |= insEncodeRegT2_M(id->idReg3());
                    imm = emitGetInsSC(id);
                    assert((imm & 0x0003) == imm);
                    code |= (imm << 4);
                }
            }
            else
            {
                sz = SMALL_IDSC_SIZE;
                if (fmt != IF_T2_E2)
                {
                    code |= insEncodeRegT2_N(id->idReg2());
                }
            }
            dst += emitOutput_Thumb2Instr(dst, code);
            break;

        case IF_T2_F1: // T2_F1    ............nnnn ttttdddd....mmmm       R1  R2  R3  R4
            sz = emitGetInstrDescSize(id);
            ;
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT2_T(id->idReg1());
            code |= insEncodeRegT2_D(id->idReg2());
            code |= insEncodeRegT2_N(id->idReg3());
            code |= insEncodeRegT2_M(id->idReg4());
            dst += emitOutput_Thumb2Instr(dst, code);
            break;

        case IF_T2_F2: // T2_F2    ............nnnn aaaadddd....mmmm       R1  R2  R3  R4
            sz   = emitGetInstrDescSize(id);
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT2_D(id->idReg1());
            code |= insEncodeRegT2_N(id->idReg2());
            code |= insEncodeRegT2_M(id->idReg3());
            code |= insEncodeRegT2_T(id->idReg4());
            dst += emitOutput_Thumb2Instr(dst, code);
            break;

        case IF_T2_G0: // T2_G0   .......PU.W.nnnn ttttTTTTiiiiiiii       R1  R2  R3         imm8, PUW
        case IF_T2_G1: // T2_G1   ............nnnn ttttTTTT........       R1  R2  R3
            sz   = emitGetInstrDescSize(id);
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT2_T(id->idReg1());
            code |= insEncodeRegT2_D(id->idReg2());
            code |= insEncodeRegT2_N(id->idReg3());
            if (fmt == IF_T2_G0)
            {
                imm = emitGetInsSC(id);
                assert(unsigned_abs(imm) <= 0x00ff);
                code |= abs(imm);
                code |= insEncodePUW_G0(id->idInsOpt(), imm);
            }
            dst += emitOutput_Thumb2Instr(dst, code);
            break;

        case IF_T2_H0: // T2_H0   ............nnnn tttt.PUWiiiiiiii       R1  R2             imm8, PUW
        case IF_T2_H1: // T2_H1   ............nnnn tttt....iiiiiiii       R1  R2             imm8
        case IF_T2_H2: // T2_H2   ............nnnn ........iiiiiiii       R1                 imm8
            sz   = emitGetInstrDescSize(id);
            imm  = emitGetInsSC(id);
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT2_T(id->idReg1());

            if (fmt != IF_T2_H2)
                code |= insEncodeRegT2_N(id->idReg2());

            if (fmt == IF_T2_H0)
            {
                assert(unsigned_abs(imm) <= 0x00ff);
                code |= insEncodePUW_H0(id->idInsOpt(), imm);
                code |= unsigned_abs(imm);
            }
            else
            {
                assert((imm & 0x00ff) == imm);
                code |= imm;
            }
            dst += emitOutput_Thumb2Instr(dst, code);
            break;

        case IF_T2_I0: // T2_I0   ..........W.nnnn rrrrrrrrrrrrrrrr       R1              W, imm16
        case IF_T2_I1: // T2_I1   ................ rrrrrrrrrrrrrrrr                          imm16
            sz   = emitGetInstrDescSize(id);
            code = emitInsCode(ins, fmt);
            if (fmt == IF_T2_I0)
            {
                code |= insEncodeRegT2_N(id->idReg1());
                code |= (1 << 21); //  W bit
            }
            imm = emitGetInsSC(id);
            assert((imm & 0x3) != 0x3);
            if (imm & 0x2)
                code |= 0x8000; //  PC bit
            if (imm & 0x1)
                code |= 0x4000; //  LR bit
            imm >>= 2;
            assert(imm <= 0x1fff); //  13 bits
            code |= imm;
            dst += emitOutput_Thumb2Instr(dst, code);
            break;

        case IF_T2_K1: // T2_K1   ............nnnn ttttiiiiiiiiiiii       R1  R2             imm12
        case IF_T2_K4: // T2_K4   ........U....... ttttiiiiiiiiiiii       R1  PC          U, imm12
        case IF_T2_K3: // T2_K3   ........U....... ....iiiiiiiiiiii       PC              U, imm12
            sz   = emitGetInstrDescSize(id);
            imm  = emitGetInsSC(id);
            code = emitInsCode(ins, fmt);
            if (fmt != IF_T2_K3)
            {
                code |= insEncodeRegT2_T(id->idReg1());
            }
            if (fmt == IF_T2_K1)
            {
                code |= insEncodeRegT2_N(id->idReg2());
                assert(imm <= 0xfff); //  12 bits
                code |= imm;
            }
            else
            {
                assert(unsigned_abs(imm) <= 0xfff); //  12 bits (signed)
                code |= abs(imm);
                if (imm >= 0)
                    code |= (1 << 23); //  U bit
            }
            dst += emitOutput_Thumb2Instr(dst, code);
            break;

        case IF_T2_K2: // T2_K2   ............nnnn ....iiiiiiiiiiii       R1                 imm12
            sz   = emitGetInstrDescSize(id);
            imm  = emitGetInsSC(id);
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT2_N(id->idReg1());
            assert(imm <= 0xfff); //  12 bits
            code |= imm;
            dst += emitOutput_Thumb2Instr(dst, code);
            break;

        case IF_T2_L0: // T2_L0   .....i.....Snnnn .iiiddddiiiiiiii       R1  R2          S, imm8<<imm4
        case IF_T2_L1: // T2_L1   .....i.....S.... .iiiddddiiiiiiii       R1              S, imm8<<imm4
        case IF_T2_L2: // T2_L2   .....i......nnnn .iii....iiiiiiii       R1                 imm8<<imm4
            sz   = emitGetInstrDescSize(id);
            imm  = emitGetInsSC(id);
            code = emitInsCode(ins, fmt);

            if (fmt == IF_T2_L2)
                code |= insEncodeRegT2_N(id->idReg1());
            else
            {
                code |= insEncodeSetFlags(id->idInsFlags());
                code |= insEncodeRegT2_D(id->idReg1());
                if (fmt == IF_T2_L0)
                    code |= insEncodeRegT2_N(id->idReg2());
            }
            assert(isModImmConst(imm)); // Funky ARM imm encoding
            imm = encodeModImmConst(imm);
            assert(imm <= 0xfff); //  12 bits
            code |= (imm & 0x00ff);
            code |= (imm & 0x0700) << 4;
            code |= (imm & 0x0800) << 15;
            dst += emitOutput_Thumb2Instr(dst, code);
            break;

        case IF_T2_M0: // T2_M0   .....i......nnnn .iiiddddiiiiiiii       R1  R2             imm12
            sz   = emitGetInstrDescSize(id);
            imm  = emitGetInsSC(id);
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT2_D(id->idReg1());
            if (fmt == IF_T2_M0)
                code |= insEncodeRegT2_N(id->idReg2());
            imm = emitGetInsSC(id);
            assert(imm <= 0xfff); //  12 bits
            code |= (imm & 0x00ff);
            code |= (imm & 0x0700) << 4;
            code |= (imm & 0x0800) << 15;
            dst += emitOutput_Thumb2Instr(dst, code);
            break;

        case IF_T2_N: // T2_N    .....i......iiii .iiiddddiiiiiiii       R1                 imm16
            sz   = emitGetInstrDescSize(id);
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT2_D(id->idReg1());
            imm = emitGetInsSC(id);
            if (id->idIsLclVar())
            {
                if (ins == INS_movw)
                {
                    imm &= 0xffff;
                }
                else
                {
                    assert(ins == INS_movt);
                    imm = (imm >> 16) & 0xffff;
                }
            }

            assert(!id->idIsReloc());
            code |= insEncodeImmT2_Mov(imm);
            dst += emitOutput_Thumb2Instr(dst, code);
            break;

        case IF_T2_N2: // T2_N2   .....i......iiii .iiiddddiiiiiiii       R1                 imm16
            sz   = emitGetInstrDescSize(id);
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT2_D(id->idReg1());
            imm  = emitGetInsSC(id);
            addr = emitConsBlock + imm;
            if (!id->idIsReloc())
            {
                assert(sizeof(size_t) == sizeof(target_size_t));
                imm = (target_size_t)(size_t)addr;
                if (ins == INS_movw)
                {
                    imm &= 0xffff;
                }
                else
                {
                    assert(ins == INS_movt);
                    imm = (imm >> 16) & 0xffff;
                }
                code |= insEncodeImmT2_Mov(imm);
                dst += emitOutput_Thumb2Instr(dst, code);
            }
            else
            {
                assert((ins == INS_movt) || (ins == INS_movw));
                dst += emitOutput_Thumb2Instr(dst, code);
                if ((ins == INS_movt) && emitComp->info.compMatchedVM)
                    emitHandlePCRelativeMov32((void*)(dst - 8), addr);
            }
            break;

        case IF_T2_N3: // T2_N3   .....i......iiii .iiiddddiiiiiiii       R1                 imm16
            sz   = sizeof(instrDescReloc);
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT2_D(id->idReg1());

            assert((ins == INS_movt) || (ins == INS_movw));
            assert(id->idIsReloc());

            addr = emitGetInsRelocValue(id);
            dst += emitOutput_Thumb2Instr(dst, code);
            if ((ins == INS_movt) && emitComp->info.compMatchedVM)
                emitHandlePCRelativeMov32((void*)(dst - 8), addr);
            break;

        case IF_T2_VFP3:
            // these are the binary operators
            // d = n - m
            sz   = emitGetInstrDescSize(id);
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT2_VectorN(id->idReg2(), size, true);
            code |= insEncodeRegT2_VectorM(id->idReg3(), size, true);
            code |= insEncodeRegT2_VectorD(id->idReg1(), size, true);
            if (size == EA_8BYTE)
                code |= 1 << 8;
            dst += emitOutput_Thumb2Instr(dst, code);
            break;

        case IF_T2_VFP2:
        {
            emitAttr srcSize;
            emitAttr dstSize;
            size_t   szCode = 0;

            switch (ins)
            {
                case INS_vcvt_i2d:
                case INS_vcvt_u2d:
                case INS_vcvt_f2d:
                    srcSize = EA_4BYTE;
                    dstSize = EA_8BYTE;
                    break;

                case INS_vcvt_d2i:
                case INS_vcvt_d2u:
                case INS_vcvt_d2f:
                    srcSize = EA_8BYTE;
                    dstSize = EA_4BYTE;
                    break;

                case INS_vmov:
                case INS_vabs:
                case INS_vsqrt:
                case INS_vcmp:
                case INS_vneg:
                    if (id->idOpSize() == EA_8BYTE)
                        szCode |= (1 << 8);
                    FALLTHROUGH;

                default:
                    srcSize = dstSize = id->idOpSize();
                    break;
            }

            sz   = emitGetInstrDescSize(id);
            code = emitInsCode(ins, fmt);
            code |= szCode;
            code |= insEncodeRegT2_VectorD(id->idReg1(), dstSize, true);
            code |= insEncodeRegT2_VectorM(id->idReg2(), srcSize, true);

            dst += emitOutput_Thumb2Instr(dst, code);
            break;
        }

        case IF_T2_VLDST:
            sz   = emitGetInstrDescSize(id);
            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT2_N(id->idReg2());
            code |= insEncodeRegT2_VectorD(id->idReg1(), size, true);

            imm = emitGetInsSC(id);
            if (imm < 0)
                imm = -imm; // bit 23 at 0 means negate
            else
                code |= 1 << 23; // set the positive bit

            // offset is +/- 1020
            assert(!(imm % 4));
            assert(imm >> 10 == 0);
            code |= imm >> 2;
            // bit 8 is set for doubles
            if (id->idOpSize() == EA_8BYTE)
                code |= (1 << 8);
            dst += emitOutput_Thumb2Instr(dst, code);
            break;

        case IF_T2_VMOVD:
            // 3op assemble a double from two int regs (or back)
            sz   = emitGetInstrDescSize(id);
            code = emitInsCode(ins, fmt);
            if (ins == INS_vmov_i2d)
            {
                code |= insEncodeRegT2_VectorM(id->idReg1(), size, true);
                code |= id->idReg2() << 12;
                code |= id->idReg3() << 16;
            }
            else
            {
                assert(ins == INS_vmov_d2i);
                code |= id->idReg1() << 12;
                code |= id->idReg2() << 16;
                code |= insEncodeRegT2_VectorM(id->idReg3(), size, true);
            }
            dst += emitOutput_Thumb2Instr(dst, code);
            break;

        case IF_T2_VMOVS:
            // 2op assemble a float from one int reg (or back)
            sz   = emitGetInstrDescSize(id);
            code = emitInsCode(ins, fmt);
            if (ins == INS_vmov_f2i)
            {
                code |= insEncodeRegT2_VectorN(id->idReg2(), EA_4BYTE, true);
                code |= id->idReg1() << 12;
            }
            else
            {
                assert(ins == INS_vmov_i2f);
                code |= insEncodeRegT2_VectorN(id->idReg1(), EA_4BYTE, true);
                code |= id->idReg2() << 12;
            }

            dst += emitOutput_Thumb2Instr(dst, code);
            break;

        case IF_T1_J3: // T1_J3   .....dddiiiiiiii                        R1  PC             imm8
        case IF_T2_M1: // T2_M1   .....i.......... .iiiddddiiiiiiii       R1  PC             imm12
            assert(id->idGCref() == GCT_NONE);
            assert(id->idIsBound());

            dst = emitOutputLJ(ig, dst, id);
            sz  = sizeof(instrDescLbl);
            break;

        case IF_T1_K:  // T1_K    ....cccciiiiiiii                       Branch              imm8, cond4
        case IF_T1_M:  // T1_M    .....iiiiiiiiiii                       Branch              imm11
        case IF_T2_J1: // T2_J1   .....Scccciiiiii ..j.jiiiiiiiiiii      Branch              imm20, cond4
        case IF_T2_J2: // T2_J2   .....Siiiiiiiiii ..j.jiiiiiiiiii.      Branch              imm24
        case IF_T2_N1: // T2_N    .....i......iiii .iiiddddiiiiiiii       R1                 imm16
        case IF_LARGEJMP:
            assert(id->idGCref() == GCT_NONE);
            assert(id->idIsBound());

            dst = emitOutputLJ(ig, dst, id);
            sz  = sizeof(instrDescJmp);
            break;

        case IF_T1_D1: // T1_D1   .........mmmm...                       R1*

            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT1_M4(id->idReg1());
            dst += emitOutput_Thumb1Instr(dst, code);
            sz = SMALL_IDSC_SIZE;
            break;

        case IF_T1_D2: // T1_D2   .........mmmm...                                R3*

            /* Is this a "fat" call descriptor? */

            if (id->idIsLargeCall())
            {
                instrDescCGCA* idCall = (instrDescCGCA*)id;
                gcrefRegs             = idCall->idcGcrefRegs;
                byrefRegs             = idCall->idcByrefRegs;
                VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
                sz = sizeof(instrDescCGCA);
            }
            else
            {
                assert(!id->idIsLargeDsp());
                assert(!id->idIsLargeCns());

                gcrefRegs = emitDecodeCallGCregs(id);
                byrefRegs = 0;
                VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
                sz = sizeof(instrDesc);
            }

            code = emitInsCode(ins, fmt);
            code |= insEncodeRegT1_M4(id->idReg3());
            callInstrSize = SafeCvtAssert<unsigned char>(emitOutput_Thumb1Instr(dst, code));
            dst += callInstrSize;
            goto DONE_CALL;

        case IF_T2_J3: // T2_J3   .....Siiiiiiiiii ..j.jiiiiiiiiii.      Call                imm24

            /* Is this a "fat" call descriptor? */

            if (id->idIsLargeCall())
            {
                instrDescCGCA* idCall = (instrDescCGCA*)id;
                gcrefRegs             = idCall->idcGcrefRegs;
                byrefRegs             = idCall->idcByrefRegs;
                VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
                sz = sizeof(instrDescCGCA);
            }
            else
            {
                assert(!id->idIsLargeDsp());
                assert(!id->idIsLargeCns());

                gcrefRegs = emitDecodeCallGCregs(id);
                byrefRegs = 0;
                VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
                sz = sizeof(instrDesc);
            }

            if (id->idAddr()->iiaAddr == NULL) /* a recursive call */
            {
                addr = emitCodeBlock;
            }
            else
            {
                addr = id->idAddr()->iiaAddr;
            }
            code = emitInsCode(ins, fmt);

            if (id->idIsDspReloc())
            {
                callInstrSize = SafeCvtAssert<unsigned char>(emitOutput_Thumb2Instr(dst, code));
                dst += callInstrSize;
                if (emitComp->info.compMatchedVM)
                    emitRecordRelocation((void*)(dst - 4), addr, IMAGE_REL_BASED_THUMB_BRANCH24);
            }
            else
            {
                addr = (BYTE*)((size_t)addr & ~1); // Clear the lowest bit from target address

                /* Calculate PC relative displacement */
                ptrdiff_t disp = addr - (dst + 4);
                bool      S    = (disp < 0);
                bool      I1   = ((disp & 0x00800000) == 0);
                bool      I2   = ((disp & 0x00400000) == 0);

                if (S)
                    code |= (1 << 26); // S bit
                if (S ^ I1)
                    code |= (1 << 13); // J1 bit
                if (S ^ I2)
                    code |= (1 << 11); // J2 bit

                int immLo = (disp & 0x00000ffe) >> 1;
                int immHi = (disp & 0x003ff000) >> 12;

                code |= (immHi << 16);
                code |= immLo;

                disp = std::abs(disp);
                assert((disp & 0x00fffffe) == disp);

                callInstrSize = SafeCvtAssert<unsigned char>(emitOutput_Thumb2Instr(dst, code));
                dst += callInstrSize;
            }

        DONE_CALL:

            /* We update the GC info before the call as the variables cannot be
               used by the call. Killing variables before the call helps with
               boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029.
               If we ever track aliased variables (which could be used by the
               call), we would have to keep them alive past the call. */

            emitUpdateLiveGCvars(GCvars, *dp);

#ifdef DEBUG
            // Output any delta in GC variable info, corresponding to the before-call GC var updates done above.
            if (EMIT_GC_VERBOSE || emitComp->opts.disasmWithGC)
            {
                emitDispGCVarDelta();
            }
#endif // DEBUG

            // If the method returns a GC ref, mark R0 appropriately.
            if (id->idGCref() == GCT_GCREF)
                gcrefRegs |= RBM_R0;
            else if (id->idGCref() == GCT_BYREF)
                byrefRegs |= RBM_R0;

            // If the GC register set has changed, report the new set.
            if (gcrefRegs != emitThisGCrefRegs)
                emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst);

            if (byrefRegs != emitThisByrefRegs)
                emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst);

            // Some helper calls may be marked as not requiring GC info to be recorded.
            if ((!id->idIsNoGC()))
            {
                // On ARM, as on AMD64, we don't change the stack pointer to push/pop args.
                // So we're not really doing a "stack pop" here (note that "args" is 0), but we use this mechanism
                // to record the call for GC info purposes.  (It might be best to use an alternate call,
                // and protect "emitStackPop" under the EMIT_TRACK_STACK_DEPTH preprocessor variable.)
                emitStackPop(dst, /*isCall*/ true, callInstrSize, /*args*/ 0);

                /* Do we need to record a call location for GC purposes? */

                if (!emitFullGCinfo)
                {
                    emitRecordGCcall(dst, callInstrSize);
                }
            }

            break;

            /********************************************************************/
            /*                            oops                                  */
            /********************************************************************/

        default:

#ifdef DEBUG
            printf("unexpected format %s\n", emitIfName(id->idInsFmt()));
            assert(!"don't know how to encode this instruction");
#endif
            break;
    }

    // Determine if any registers now hold GC refs, or whether a register that was overwritten held a GC ref.
    // We assume here that "id->idGCref()" is not GC_NONE only if the instruction described by "id" writes a
    // GC ref to register "id->idReg1()".  (It may, apparently, also not be GC_NONE in other cases, such as
    // for stores, but we ignore those cases here.)
    if (emitInsMayWriteToGCReg(id)) // True if "id->idIns()" writes to a register than can hold GC ref.
    {
        // If we ever generate instructions that write to multiple registers (LDM, or POP),
        // then we'd need to more work here to ensure that changes in the status of GC refs are
        // tracked properly.
        if (emitInsMayWriteMultipleRegs(id))
        {
            // We explicitly list the multiple-destination-target instruction that we expect to
            // be emitted outside of the prolog and epilog here.
            switch (ins)
            {
                case INS_smull:
                case INS_umull:
                case INS_smlal:
                case INS_umlal:
                case INS_vmov_d2i:
                    // For each of these, idReg1() and idReg2() are the destination registers.
                    emitGCregDeadUpd(id->idReg1(), dst);
                    emitGCregDeadUpd(id->idReg2(), dst);
                    break;
                default:
                    assert(false); // We need to recognize this multi-target instruction...
            }
        }
        else
        {
            if (id->idGCref() != GCT_NONE)
            {
                emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
            }
            else
            {
                // I also assume that "idReg1" is the destination register of all instructions that write to registers.
                emitGCregDeadUpd(id->idReg1(), dst);
            }
        }
    }

    // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC
    // ref or overwritten one.
    if (emitInsWritesToLclVarStackLoc(id))
    {
        int       varNum = id->idAddr()->iiaLclVar.lvaVarNum();
        unsigned  ofs    = AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), TARGET_POINTER_SIZE);
        regNumber regBase;
        int adr = emitComp->lvaFrameAddress(varNum, true, &regBase, ofs, /* isFloatUsage */ false); // no float GC refs
        if (id->idGCref() != GCT_NONE)
        {
            emitGCvarLiveUpd(adr + ofs, varNum, id->idGCref(), dst DEBUG_ARG(varNum));
        }
        else
        {
            // If the type of the local is a gc ref type, update the liveness.
            var_types vt;
            if (varNum >= 0)
            {
                // "Regular" (non-spill-temp) local.
                vt = var_types(emitComp->lvaTable[varNum].lvType);
            }
            else
            {
                TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum);
                vt              = tmpDsc->tdTempType();
            }
            if (vt == TYP_REF || vt == TYP_BYREF)
                emitGCvarDeadUpd(adr + ofs, dst DEBUG_ARG(varNum));
        }
    }

#ifdef DEBUG
    /* Make sure we set the instruction descriptor size correctly */

    size_t expected = emitSizeOfInsDsc(id);
    assert(sz == expected);

    if (emitComp->opts.disAsm || emitComp->verbose)
    {
        emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig);
    }

    if (emitComp->compDebugBreak)
    {
        // set JitEmitPrintRefRegs=1 will print out emitThisGCrefRegs and emitThisByrefRegs
        // at the beginning of this method.
        if (JitConfig.JitEmitPrintRefRegs() != 0)
        {
            printf("Before emitOutputInstr for id->idDebugOnlyInfo()->idNum=0x%02x\n", id->idDebugOnlyInfo()->idNum);
            printf("  emitThisGCrefRegs(0x%p)=", dspPtr(&emitThisGCrefRegs));
            printRegMaskInt(emitThisGCrefRegs);
            emitDispRegSet(emitThisGCrefRegs);
            printf("\n");
            printf("  emitThisByrefRegs(0x%p)=", dspPtr(&emitThisByrefRegs));
            printRegMaskInt(emitThisByrefRegs);
            emitDispRegSet(emitThisByrefRegs);
            printf("\n");
        }

        // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for
        // emitting instruction a6, (i.e. IN00a6 in jitdump).
        if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum)
        {
            assert(!"JitBreakEmitOutputInstr reached");
        }
    }

    // Output any delta in GC info.
    if (EMIT_GC_VERBOSE || emitComp->opts.disasmWithGC)
    {
        emitDispGCInfoDelta();
    }
#else
    if (emitComp->opts.disAsm)
    {
        size_t expected = emitSizeOfInsDsc(id);
        assert(sz == expected);
        emitDispIns(id, false, 0, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig);
    }
#endif

    /* All instructions are expected to generate code */

    assert(*dp != dst);

    *dp = dst;

    return sz;
}