void I8Lowerer::inlineDivRem64()

in vm/jitrino/src/codegenerator/ia32/Ia32I8Lowerer.cpp [1125:1463]


void I8Lowerer::inlineDivRem64(bool wantReminder, Inst* inst)
{
    assert(inst->getOpndRoles(0) & Inst::OpndRole_Def);
    assert(inst->getOpndRoles(1) & Inst::OpndRole_Use);
    assert(inst->getOpndRoles(2) & Inst::OpndRole_Use);
    Opnd* dst = inst->getOpnd(0);
    Opnd* src1 = inst->getOpnd(1);
    Opnd* src2 = inst->getOpnd(2);
    
    Opnd * res_lo = NULL, * res_hi = NULL;
    Opnd * a_lo = NULL, * a_hi = NULL;
    Opnd * b_lo = NULL, * b_hi = NULL;

    prepareNewOpnds(dst,  res_lo, res_hi);
    prepareNewOpnds(src1, a_lo, a_hi);
    prepareNewOpnds(src2, b_lo, b_hi);

    //
    TypeManager& tm = irManager->getTypeManager();
    Type* int32type = tm.getInt32Type();
    
    Opnd* edx = irManager->newOpnd(int32type);
    Opnd* eax = irManager->newOpnd(int32type);
    Opnd* ecx = irManager->newOpnd(int32type);
    Opnd* ebx = irManager->newOpnd(int32type);
    Opnd* edi = irManager->newOpnd(int32type);
    Opnd* esi = irManager->newOpnd(int32type);

    Opnd* temp_lo = irManager->newOpnd(int32type);
    Opnd* temp_hi = irManager->newOpnd(int32type);
    Opnd* zero = irManager->newImmOpnd(int32type, 0);
    Opnd* one = irManager->newImmOpnd(int32type, 1);
    //

    newSubGFG();
    
    Node* entryNode = getSubCfgEntryNode();
    Node* check_A_SignNode = newBB();
    Node* neg_A_Node = newBB();
    Node* check_B_SignNode = newBB();
    Node* neg_B_Node = newBB();
    
    Node* testBigDvsrNode = newBB();
    Node* testOneDivNode = newBB();
    Node* simpleDivNode = newBB();
    Node* oneDivNode = newBB();
    Node* bigDivisorNode = newBB();
    Node* storeResultNode = newBB();
    
    //
    // Here we go.
    // The algorithm is based on unsigned div algo from assembly gems page
    // http://www.df.lth.se/~john_e/gems/gem002a.html (also published in AMD 
    // optimization manual)  and modified for signed arithmetics and for 
    // the Jitrino CG specifics.
    //

    //
    // entryNode:
    //
    // Preparation - load all into 'registers'
    //

    Opnd* fixQuotSign = irManager->newOpnd(int32type);
    Opnd* fixRemSign = irManager->newOpnd(int32type);
                        // Sign presumed positive
    setCurrentNode(entryNode);
    /* mov fixQuotSign, 0*/ newInst(Mnemonic_MOV, fixQuotSign, zero);
    /* mov fixRemSign, 0*/  newInst(Mnemonic_MOV, fixRemSign, zero);
    
    /* mov edx, a_hi*/  newInst(Mnemonic_MOV, edx, a_hi);
    /* mov eax, a_lo*/  newInst(Mnemonic_MOV, eax, a_lo);
    /* mov ecx, b_hi*/  newInst(Mnemonic_MOV, ecx, b_hi);
    /* mov ebx, b_lo*/  newInst(Mnemonic_MOV, ebx, b_lo);
    connectNodeTo(check_A_SignNode);
    setCurrentNode(NULL);
    entryNode = (Node*)0xDEADBEEF;

    //
    // check_A_SignNode:
    //
    // Test for signs and convert to unsigned when needed
    // 

    setCurrentNode(check_A_SignNode);
    /* test edx, edx*/ newInst(Mnemonic_TEST, edx, edx);
    /* jns  check_B_Sign*/ newBranch(Mnemonic_JNS, check_B_SignNode, neg_A_Node);
    setCurrentNode(NULL);
    check_A_SignNode = (Node*)0xDEADBEEF;

    //
    // neg_A:
    //
    //

    setCurrentNode(neg_A_Node);
    /* mov fixQuotSign, 1*/ newInst(Mnemonic_MOV, fixQuotSign, one);
    /* mov fixRemSign, 1*/  newInst(Mnemonic_MOV, fixRemSign, one);
    /* neg eax      */  newInst(Mnemonic_NEG, 1, eax, eax);
    /* adc edx, 0   */  newInst(Mnemonic_ADC, 1, edx, edx, zero);
    /* neg edx      */  newInst(Mnemonic_NEG, 1, edx, edx);
    connectNodeTo(check_B_SignNode);
    setCurrentNode(NULL);
    neg_A_Node = (Node*)0xDEADBEEF;

    //
    // check_B_Sign_Node:
    //
    setCurrentNode(check_B_SignNode);
    /* test ecx, ecx*/  newInst(Mnemonic_TEST, ecx, ecx);
    /* jns  testBigDvsrNode*/ newBranch(Mnemonic_JNS, testBigDvsrNode, neg_B_Node);
    setCurrentNode(NULL);
    check_B_SignNode = (Node*)0xDEADBEEF;

    //
    // neg_B_Node:
    //
    // When doing REM, the result's sing only depends on 
    // dividend's sign no need to change fixRemSign
    
    setCurrentNode(neg_B_Node);
    /* XOR fixQuotSign, 1*/  newInst(Mnemonic_XOR, 1, fixQuotSign, fixQuotSign, one);
    /* neg ebx      */  newInst(Mnemonic_NEG, 1, ebx, ebx);
    /* adc ecx, 0   */  newInst(Mnemonic_ADC, 1, ecx, ecx, zero);
    /* neg ecx      */  newInst(Mnemonic_NEG, 1, ecx, ecx);
    connectNodeTo(testBigDvsrNode);
    setCurrentNode(NULL);
    neg_B_Node = (Node*)0xDEADBEEF;

    //
    // testBigDvsr:
    //
                        // divisor > 2^32-1 ?
    setCurrentNode(testBigDvsrNode);
    /* cmp ecx, 0   */  newInst(Mnemonic_CMP, ecx, zero); 
                        // YES - proceed to the hard way.
                        // NO - fall to the simpler path
    /* jnz BIG_DVSOR*/  newBranch(Mnemonic_JNZ, bigDivisorNode, testOneDivNode);
    setCurrentNode(NULL);
    testBigDvsrNode = (Node*)0xDEADBEEF;

    //
    // testOneDivNode:
    //
    //
                        //only one division needed ? (ecx = 0)
    setCurrentNode(testOneDivNode);
    /* cmp edx, ebx */  newInst(Mnemonic_CMP, edx, ebx);
                        // YES - one division sufficient
    /* jb ONE_DIV  */   newBranch(Mnemonic_JB, oneDivNode, simpleDivNode);
    setCurrentNode(NULL);
    testOneDivNode = (Node*)0xDEADBEEF;
    
    //
    // simpleDivNode:
    //

    setCurrentNode(simpleDivNode);
    /* mov ecx, eax */  newInst(Mnemonic_MOV, ecx, eax);     // save dividend-lo in ecx
    /* mov eax, edx */  newInst(Mnemonic_MOV, eax, edx);     // get dividend-hi
    /* xor edx, edx */  newInst(Mnemonic_MOV, edx, zero);    // zero extend it into edx:eax
    /* div ebx      */  newInst(Mnemonic_DIV, 2, edx, eax, edx, eax, ebx); // quotient-hi in eax
    /* xchg eax, ecx*/  newInst(Mnemonic_XCHG, eax, ecx);    //ecx = quotient-hi, eax =dividend-lo
    connectNodeTo(oneDivNode);
    setCurrentNode(NULL);
    simpleDivNode = (Node*)0xDEADBEEF;

    // 
    // oneDivNode:
    //

    setCurrentNode(oneDivNode);
    /* div ebx */       newInst(Mnemonic_DIV, 2, edx, eax, edx, eax, ebx);  // eax = quotient-lo
    /* mov ebx, edx */  newInst(Mnemonic_MOV, ebx, edx);    // ebx = remainder-lo
    /* mov edx, ecx */  newInst(Mnemonic_MOV, edx, ecx);    // edx = quotient-hi(quotient in edx:eax)
    /* xor ecx, ecx */  newInst(Mnemonic_MOV, ecx, zero);   // ecx = remainder-hi (rem. in ecx:ebx)
    /* jmp saveResult*/
    connectNodeTo(storeResultNode);
    setCurrentNode(NULL);
    oneDivNode = (Node*)0xDEADBEEF;

    //
    // bigDivisorNode:
    //
    setCurrentNode(bigDivisorNode);
    /* mov temp_hi, edx */  newInst(Mnemonic_MOV, temp_hi, edx);
    /* mov temp_lo, eax */  newInst(Mnemonic_MOV, temp_lo, eax);    // save dividend
    /* mov esi, ebx     */  newInst(Mnemonic_MOV, esi, ebx);
    /* mov edi, ecx     */  newInst(Mnemonic_MOV, edi, ecx);
                            // ^^^divisor now in edi:ebx and ecx:esi

                            // shift both divisor and dividend right on 1 bit
    /* shr edx, 1       */  newInst(Mnemonic_SHR, edx, one);
    /* rcr eax, 1       */  newInst(Mnemonic_RCR, eax, one);
    /* ror edi, 1       */  newInst(Mnemonic_ROR, edi, one);
    /* rcr ebx, 1       */  newInst(Mnemonic_RCR, ebx, one);

    //
    // FIXME: workarounding WebMaker problem, which does not like 
    // both DEF and USE of the same arg in the same instruction.
    // Replace, 'reg = BSR reg, 0' with 
    // temp = reg ; reg = BSR temp, 0
    // Funny thing is that this only happens with BSR.
    // 
#if 0 // _FIXME_
    /* bsr ecx, ecx  */     newInst(Mnemonic_BSR, 1, ecx, ecx);
#else
    Opnd* tmp = irManager->newOpnd(ecx->getType());
    /* mov tmp, ecx  */     newInst(Mnemonic_MOV, 1, tmp, ecx);
    /* bsr ecx, tmp  */     newInst(Mnemonic_BSR, 1, ecx, tmp);
#endif
                            // ecx = number of remaining shifts

                            // scale divisor and dividend down, so divisor fits
                            // into EBX (<2^32)
    // FIXME: Currently, constraint resolver fails to assign 'ecx' operand to the 
    // real ECX, and this causes SpillGen failure.
    // Forcing the ECX right here:
#if 0 //_FIXME_
    /* shrd ebx, edi, CL*/  newInst(Mnemonic_SHRD, ebx, edi, ecx);
    /* shrd eax, edx, CL*/  newInst(Mnemonic_SHRD, eax, edx, ecx);
    /* shr  edx, CL   */    newInst(Mnemonic_SHR, edx, ecx);
#else
    Opnd* trueECX = irManager->getRegOpnd(RegName_ECX);
    newInst(Mnemonic_MOV, trueECX, ecx);
    //~FIXME
    /* shrd ebx, edi, CL*/  newInst(Mnemonic_SHRD, ebx, edi, trueECX);
    /* shrd eax, edx, CL*/  newInst(Mnemonic_SHRD, eax, edx, trueECX);
    /* shr  edx, CL   */    newInst(Mnemonic_SHR, edx, trueECX);
#endif
    /* rol  edi, 1    */    newInst(Mnemonic_ROL, edi, one);
                            // get quotient
    /* div    ebx     */    newInst(Mnemonic_DIV, 2, edx, eax, edx, eax, ebx, NULL);
    /* mov ebx, temp_lo*/   newInst(Mnemonic_MOV, ebx, temp_lo);
    /* mov ecx, eax    */   newInst(Mnemonic_MOV, ecx, eax);
    /* imul edi, eax   */   newInst(Mnemonic_IMUL, edi, eax);
    /* mul  esi        */   newInst(Mnemonic_MUL, 2, edx, eax, eax, esi, NULL);
    /* add edx, edi    */   newInst(Mnemonic_ADD, 1, edx, edx, edi);    // edx:eax = quotient * divisor
    /* sub ebx, eax    */   newInst(Mnemonic_SUB, 1, ebx, ebx, eax);    // dividend-lo - (quot.*divisor)-lo
    /* mov eax, ecx    */   newInst(Mnemonic_MOV, eax, ecx);
    /* mov ecx, temp_hi*/   newInst(Mnemonic_MOV, ecx, temp_hi);        // restore dividend hi-word
    /* sbb ecx, edx    */   newInst(Mnemonic_SBB, 1, ecx, ecx, edx);    // subtract divisor * quot. from dividend
    /* sbb edx, edx    */   newInst(Mnemonic_SBB, 1, edx, edx, edx);    // 0 if remainder > 0, else FFFFFFFFh
    /* and esi, edx    */   newInst(Mnemonic_AND, 1, esi, esi, edx);    // nothing to add
    /* and edi, edx    */   newInst(Mnemonic_AND, 1, edi, edi, edx);    // back if remainder positive
    /* add ebx, esi    */   newInst(Mnemonic_ADD, 1, ebx, ebx, esi);    // correct remainder
    /* adc ecx, edi    */   newInst(Mnemonic_ADC, 1, ecx, ecx, edi);    // and quotient if
    /* add eax, edx    */   newInst(Mnemonic_ADD, 1, eax, eax, edx);    // necessary
    /* xor edx, edx    */   newInst(Mnemonic_MOV, edx, zero);           // clear hi-word of quot (eax<=FFFFFFFFh)
    
    connectNodeTo(storeResultNode);
    setCurrentNode(NULL);
    bigDivisorNode = (Node*)0xDEADBEEF;

    Node* testFixQuotSignNode = newBB();
    Node* fixQuotSignNode = newBB();
    Node* testFixRemSignNode = newBB();
    Node* fixRemSignNode = newBB();
    Node* finitaNode = newBB();

    //
    // storeResultNode:
    // 
    Opnd* quot_lo = irManager->newOpnd(int32type);
    Opnd* quot_hi = irManager->newOpnd(int32type);
    Opnd* rem_lo = irManager->newOpnd(int32type);
    Opnd* rem_hi = irManager->newOpnd(int32type);
    
    setCurrentNode(storeResultNode);
    /* mov rem_lo, ebx*/    newInst(Mnemonic_MOV, rem_lo, ebx);
    /* mov rem_hi, ecx*/    newInst(Mnemonic_MOV, rem_hi, ecx);
    /* mov quot_lo, eax*/   newInst(Mnemonic_MOV, quot_lo, eax);
    /* mov quot_hi, edx*/   newInst(Mnemonic_MOV, quot_hi, edx);
    connectNodeTo(testFixQuotSignNode);
    setCurrentNode(NULL);
    storeResultNode = (Node*)0xDEADBEEF;

    //
    // testFixQuotSignNode:
    //
    //
    setCurrentNode(testFixQuotSignNode);
    /* cmp fixQuotSign, 0  */   newInst(Mnemonic_CMP, fixQuotSign, zero);
    /* jz testFixRemSignNode */ newBranch(Mnemonic_JZ, testFixRemSignNode, fixQuotSignNode);
    setCurrentNode(NULL);
    testFixQuotSignNode = (Node*)0xDEADBEEF;

    //
    // fixQuotSignNode:
    //
    setCurrentNode(fixQuotSignNode);
    /* neg res_lo      */  newInst(Mnemonic_NEG, 1, quot_lo, quot_lo);
    /* adc res_hi, 0   */  newInst(Mnemonic_ADC, 1, quot_hi, quot_hi, zero);
    /* neg res_lo      */  newInst(Mnemonic_NEG, 1, quot_hi, quot_hi);
    connectNodeTo(testFixRemSignNode);
    setCurrentNode(NULL);
    fixQuotSignNode = (Node*)0xDEADBEEF;

    //
    // testFixRemSignNode:
    //
    //
    setCurrentNode(testFixRemSignNode);
    /* cmp fixRemSign, 0  */    newInst(Mnemonic_CMP, fixRemSign, zero);
    /* jz finitaNode */         newBranch(Mnemonic_JZ, finitaNode, fixRemSignNode);
    setCurrentNode(NULL);
    testFixQuotSignNode = (Node*)0xDEADBEEF;

    //
    // fixRemSignNode:
    //
    setCurrentNode(fixRemSignNode);
    /* neg res_lo      */  newInst(Mnemonic_NEG, 1, rem_lo, rem_lo);
    /* adc res_hi, 0   */  newInst(Mnemonic_ADC, 1, rem_hi, rem_hi, zero);
    /* neg res_lo      */  newInst(Mnemonic_NEG, 1, rem_hi, rem_hi);
    connectNodeTo(finitaNode);
    setCurrentNode(NULL);
    fixQuotSignNode = (Node*)0xDEADBEEF;

    //
    // finitaNode:
    //
    setCurrentNode(finitaNode);
    if (wantReminder) {
        /* mov res_lo, rem_lo*/  newInst(Mnemonic_MOV, res_lo, rem_lo);
        /* mov res_hi, rem_hi*/  newInst(Mnemonic_MOV, res_hi, rem_hi);
    }
    else {
        /* mov res_lo, quot_lo*/  newInst(Mnemonic_MOV, res_lo, quot_lo);
        /* mov res_hi, quot_hi*/  newInst(Mnemonic_MOV, res_hi, quot_hi);
    }
    connectNodes(finitaNode, getSubCfgReturnNode());
    setCurrentNode(NULL);
    finitaNode = (Node*)0xDEADBEEF;

    Node* originalInstNode = inst->getNode();
    propagateSubCFG(inst);
    propagateDivRemResults(inst, originalInstNode, quot_lo, quot_hi, rem_lo, rem_hi);
}