in vm/jitrino/src/codegenerator/ia32/Ia32I8Lowerer.cpp [1125:1463]
void I8Lowerer::inlineDivRem64(bool wantReminder, Inst* inst)
{
assert(inst->getOpndRoles(0) & Inst::OpndRole_Def);
assert(inst->getOpndRoles(1) & Inst::OpndRole_Use);
assert(inst->getOpndRoles(2) & Inst::OpndRole_Use);
Opnd* dst = inst->getOpnd(0);
Opnd* src1 = inst->getOpnd(1);
Opnd* src2 = inst->getOpnd(2);
Opnd * res_lo = NULL, * res_hi = NULL;
Opnd * a_lo = NULL, * a_hi = NULL;
Opnd * b_lo = NULL, * b_hi = NULL;
prepareNewOpnds(dst, res_lo, res_hi);
prepareNewOpnds(src1, a_lo, a_hi);
prepareNewOpnds(src2, b_lo, b_hi);
//
TypeManager& tm = irManager->getTypeManager();
Type* int32type = tm.getInt32Type();
Opnd* edx = irManager->newOpnd(int32type);
Opnd* eax = irManager->newOpnd(int32type);
Opnd* ecx = irManager->newOpnd(int32type);
Opnd* ebx = irManager->newOpnd(int32type);
Opnd* edi = irManager->newOpnd(int32type);
Opnd* esi = irManager->newOpnd(int32type);
Opnd* temp_lo = irManager->newOpnd(int32type);
Opnd* temp_hi = irManager->newOpnd(int32type);
Opnd* zero = irManager->newImmOpnd(int32type, 0);
Opnd* one = irManager->newImmOpnd(int32type, 1);
//
newSubGFG();
Node* entryNode = getSubCfgEntryNode();
Node* check_A_SignNode = newBB();
Node* neg_A_Node = newBB();
Node* check_B_SignNode = newBB();
Node* neg_B_Node = newBB();
Node* testBigDvsrNode = newBB();
Node* testOneDivNode = newBB();
Node* simpleDivNode = newBB();
Node* oneDivNode = newBB();
Node* bigDivisorNode = newBB();
Node* storeResultNode = newBB();
//
// Here we go.
// The algorithm is based on unsigned div algo from assembly gems page
// http://www.df.lth.se/~john_e/gems/gem002a.html (also published in AMD
// optimization manual) and modified for signed arithmetics and for
// the Jitrino CG specifics.
//
//
// entryNode:
//
// Preparation - load all into 'registers'
//
Opnd* fixQuotSign = irManager->newOpnd(int32type);
Opnd* fixRemSign = irManager->newOpnd(int32type);
// Sign presumed positive
setCurrentNode(entryNode);
/* mov fixQuotSign, 0*/ newInst(Mnemonic_MOV, fixQuotSign, zero);
/* mov fixRemSign, 0*/ newInst(Mnemonic_MOV, fixRemSign, zero);
/* mov edx, a_hi*/ newInst(Mnemonic_MOV, edx, a_hi);
/* mov eax, a_lo*/ newInst(Mnemonic_MOV, eax, a_lo);
/* mov ecx, b_hi*/ newInst(Mnemonic_MOV, ecx, b_hi);
/* mov ebx, b_lo*/ newInst(Mnemonic_MOV, ebx, b_lo);
connectNodeTo(check_A_SignNode);
setCurrentNode(NULL);
entryNode = (Node*)0xDEADBEEF;
//
// check_A_SignNode:
//
// Test for signs and convert to unsigned when needed
//
setCurrentNode(check_A_SignNode);
/* test edx, edx*/ newInst(Mnemonic_TEST, edx, edx);
/* jns check_B_Sign*/ newBranch(Mnemonic_JNS, check_B_SignNode, neg_A_Node);
setCurrentNode(NULL);
check_A_SignNode = (Node*)0xDEADBEEF;
//
// neg_A:
//
//
setCurrentNode(neg_A_Node);
/* mov fixQuotSign, 1*/ newInst(Mnemonic_MOV, fixQuotSign, one);
/* mov fixRemSign, 1*/ newInst(Mnemonic_MOV, fixRemSign, one);
/* neg eax */ newInst(Mnemonic_NEG, 1, eax, eax);
/* adc edx, 0 */ newInst(Mnemonic_ADC, 1, edx, edx, zero);
/* neg edx */ newInst(Mnemonic_NEG, 1, edx, edx);
connectNodeTo(check_B_SignNode);
setCurrentNode(NULL);
neg_A_Node = (Node*)0xDEADBEEF;
//
// check_B_Sign_Node:
//
setCurrentNode(check_B_SignNode);
/* test ecx, ecx*/ newInst(Mnemonic_TEST, ecx, ecx);
/* jns testBigDvsrNode*/ newBranch(Mnemonic_JNS, testBigDvsrNode, neg_B_Node);
setCurrentNode(NULL);
check_B_SignNode = (Node*)0xDEADBEEF;
//
// neg_B_Node:
//
// When doing REM, the result's sing only depends on
// dividend's sign no need to change fixRemSign
setCurrentNode(neg_B_Node);
/* XOR fixQuotSign, 1*/ newInst(Mnemonic_XOR, 1, fixQuotSign, fixQuotSign, one);
/* neg ebx */ newInst(Mnemonic_NEG, 1, ebx, ebx);
/* adc ecx, 0 */ newInst(Mnemonic_ADC, 1, ecx, ecx, zero);
/* neg ecx */ newInst(Mnemonic_NEG, 1, ecx, ecx);
connectNodeTo(testBigDvsrNode);
setCurrentNode(NULL);
neg_B_Node = (Node*)0xDEADBEEF;
//
// testBigDvsr:
//
// divisor > 2^32-1 ?
setCurrentNode(testBigDvsrNode);
/* cmp ecx, 0 */ newInst(Mnemonic_CMP, ecx, zero);
// YES - proceed to the hard way.
// NO - fall to the simpler path
/* jnz BIG_DVSOR*/ newBranch(Mnemonic_JNZ, bigDivisorNode, testOneDivNode);
setCurrentNode(NULL);
testBigDvsrNode = (Node*)0xDEADBEEF;
//
// testOneDivNode:
//
//
//only one division needed ? (ecx = 0)
setCurrentNode(testOneDivNode);
/* cmp edx, ebx */ newInst(Mnemonic_CMP, edx, ebx);
// YES - one division sufficient
/* jb ONE_DIV */ newBranch(Mnemonic_JB, oneDivNode, simpleDivNode);
setCurrentNode(NULL);
testOneDivNode = (Node*)0xDEADBEEF;
//
// simpleDivNode:
//
setCurrentNode(simpleDivNode);
/* mov ecx, eax */ newInst(Mnemonic_MOV, ecx, eax); // save dividend-lo in ecx
/* mov eax, edx */ newInst(Mnemonic_MOV, eax, edx); // get dividend-hi
/* xor edx, edx */ newInst(Mnemonic_MOV, edx, zero); // zero extend it into edx:eax
/* div ebx */ newInst(Mnemonic_DIV, 2, edx, eax, edx, eax, ebx); // quotient-hi in eax
/* xchg eax, ecx*/ newInst(Mnemonic_XCHG, eax, ecx); //ecx = quotient-hi, eax =dividend-lo
connectNodeTo(oneDivNode);
setCurrentNode(NULL);
simpleDivNode = (Node*)0xDEADBEEF;
//
// oneDivNode:
//
setCurrentNode(oneDivNode);
/* div ebx */ newInst(Mnemonic_DIV, 2, edx, eax, edx, eax, ebx); // eax = quotient-lo
/* mov ebx, edx */ newInst(Mnemonic_MOV, ebx, edx); // ebx = remainder-lo
/* mov edx, ecx */ newInst(Mnemonic_MOV, edx, ecx); // edx = quotient-hi(quotient in edx:eax)
/* xor ecx, ecx */ newInst(Mnemonic_MOV, ecx, zero); // ecx = remainder-hi (rem. in ecx:ebx)
/* jmp saveResult*/
connectNodeTo(storeResultNode);
setCurrentNode(NULL);
oneDivNode = (Node*)0xDEADBEEF;
//
// bigDivisorNode:
//
setCurrentNode(bigDivisorNode);
/* mov temp_hi, edx */ newInst(Mnemonic_MOV, temp_hi, edx);
/* mov temp_lo, eax */ newInst(Mnemonic_MOV, temp_lo, eax); // save dividend
/* mov esi, ebx */ newInst(Mnemonic_MOV, esi, ebx);
/* mov edi, ecx */ newInst(Mnemonic_MOV, edi, ecx);
// ^^^divisor now in edi:ebx and ecx:esi
// shift both divisor and dividend right on 1 bit
/* shr edx, 1 */ newInst(Mnemonic_SHR, edx, one);
/* rcr eax, 1 */ newInst(Mnemonic_RCR, eax, one);
/* ror edi, 1 */ newInst(Mnemonic_ROR, edi, one);
/* rcr ebx, 1 */ newInst(Mnemonic_RCR, ebx, one);
//
// FIXME: workarounding WebMaker problem, which does not like
// both DEF and USE of the same arg in the same instruction.
// Replace, 'reg = BSR reg, 0' with
// temp = reg ; reg = BSR temp, 0
// Funny thing is that this only happens with BSR.
//
#if 0 // _FIXME_
/* bsr ecx, ecx */ newInst(Mnemonic_BSR, 1, ecx, ecx);
#else
Opnd* tmp = irManager->newOpnd(ecx->getType());
/* mov tmp, ecx */ newInst(Mnemonic_MOV, 1, tmp, ecx);
/* bsr ecx, tmp */ newInst(Mnemonic_BSR, 1, ecx, tmp);
#endif
// ecx = number of remaining shifts
// scale divisor and dividend down, so divisor fits
// into EBX (<2^32)
// FIXME: Currently, constraint resolver fails to assign 'ecx' operand to the
// real ECX, and this causes SpillGen failure.
// Forcing the ECX right here:
#if 0 //_FIXME_
/* shrd ebx, edi, CL*/ newInst(Mnemonic_SHRD, ebx, edi, ecx);
/* shrd eax, edx, CL*/ newInst(Mnemonic_SHRD, eax, edx, ecx);
/* shr edx, CL */ newInst(Mnemonic_SHR, edx, ecx);
#else
Opnd* trueECX = irManager->getRegOpnd(RegName_ECX);
newInst(Mnemonic_MOV, trueECX, ecx);
//~FIXME
/* shrd ebx, edi, CL*/ newInst(Mnemonic_SHRD, ebx, edi, trueECX);
/* shrd eax, edx, CL*/ newInst(Mnemonic_SHRD, eax, edx, trueECX);
/* shr edx, CL */ newInst(Mnemonic_SHR, edx, trueECX);
#endif
/* rol edi, 1 */ newInst(Mnemonic_ROL, edi, one);
// get quotient
/* div ebx */ newInst(Mnemonic_DIV, 2, edx, eax, edx, eax, ebx, NULL);
/* mov ebx, temp_lo*/ newInst(Mnemonic_MOV, ebx, temp_lo);
/* mov ecx, eax */ newInst(Mnemonic_MOV, ecx, eax);
/* imul edi, eax */ newInst(Mnemonic_IMUL, edi, eax);
/* mul esi */ newInst(Mnemonic_MUL, 2, edx, eax, eax, esi, NULL);
/* add edx, edi */ newInst(Mnemonic_ADD, 1, edx, edx, edi); // edx:eax = quotient * divisor
/* sub ebx, eax */ newInst(Mnemonic_SUB, 1, ebx, ebx, eax); // dividend-lo - (quot.*divisor)-lo
/* mov eax, ecx */ newInst(Mnemonic_MOV, eax, ecx);
/* mov ecx, temp_hi*/ newInst(Mnemonic_MOV, ecx, temp_hi); // restore dividend hi-word
/* sbb ecx, edx */ newInst(Mnemonic_SBB, 1, ecx, ecx, edx); // subtract divisor * quot. from dividend
/* sbb edx, edx */ newInst(Mnemonic_SBB, 1, edx, edx, edx); // 0 if remainder > 0, else FFFFFFFFh
/* and esi, edx */ newInst(Mnemonic_AND, 1, esi, esi, edx); // nothing to add
/* and edi, edx */ newInst(Mnemonic_AND, 1, edi, edi, edx); // back if remainder positive
/* add ebx, esi */ newInst(Mnemonic_ADD, 1, ebx, ebx, esi); // correct remainder
/* adc ecx, edi */ newInst(Mnemonic_ADC, 1, ecx, ecx, edi); // and quotient if
/* add eax, edx */ newInst(Mnemonic_ADD, 1, eax, eax, edx); // necessary
/* xor edx, edx */ newInst(Mnemonic_MOV, edx, zero); // clear hi-word of quot (eax<=FFFFFFFFh)
connectNodeTo(storeResultNode);
setCurrentNode(NULL);
bigDivisorNode = (Node*)0xDEADBEEF;
Node* testFixQuotSignNode = newBB();
Node* fixQuotSignNode = newBB();
Node* testFixRemSignNode = newBB();
Node* fixRemSignNode = newBB();
Node* finitaNode = newBB();
//
// storeResultNode:
//
Opnd* quot_lo = irManager->newOpnd(int32type);
Opnd* quot_hi = irManager->newOpnd(int32type);
Opnd* rem_lo = irManager->newOpnd(int32type);
Opnd* rem_hi = irManager->newOpnd(int32type);
setCurrentNode(storeResultNode);
/* mov rem_lo, ebx*/ newInst(Mnemonic_MOV, rem_lo, ebx);
/* mov rem_hi, ecx*/ newInst(Mnemonic_MOV, rem_hi, ecx);
/* mov quot_lo, eax*/ newInst(Mnemonic_MOV, quot_lo, eax);
/* mov quot_hi, edx*/ newInst(Mnemonic_MOV, quot_hi, edx);
connectNodeTo(testFixQuotSignNode);
setCurrentNode(NULL);
storeResultNode = (Node*)0xDEADBEEF;
//
// testFixQuotSignNode:
//
//
setCurrentNode(testFixQuotSignNode);
/* cmp fixQuotSign, 0 */ newInst(Mnemonic_CMP, fixQuotSign, zero);
/* jz testFixRemSignNode */ newBranch(Mnemonic_JZ, testFixRemSignNode, fixQuotSignNode);
setCurrentNode(NULL);
testFixQuotSignNode = (Node*)0xDEADBEEF;
//
// fixQuotSignNode:
//
setCurrentNode(fixQuotSignNode);
/* neg res_lo */ newInst(Mnemonic_NEG, 1, quot_lo, quot_lo);
/* adc res_hi, 0 */ newInst(Mnemonic_ADC, 1, quot_hi, quot_hi, zero);
/* neg res_lo */ newInst(Mnemonic_NEG, 1, quot_hi, quot_hi);
connectNodeTo(testFixRemSignNode);
setCurrentNode(NULL);
fixQuotSignNode = (Node*)0xDEADBEEF;
//
// testFixRemSignNode:
//
//
setCurrentNode(testFixRemSignNode);
/* cmp fixRemSign, 0 */ newInst(Mnemonic_CMP, fixRemSign, zero);
/* jz finitaNode */ newBranch(Mnemonic_JZ, finitaNode, fixRemSignNode);
setCurrentNode(NULL);
testFixQuotSignNode = (Node*)0xDEADBEEF;
//
// fixRemSignNode:
//
setCurrentNode(fixRemSignNode);
/* neg res_lo */ newInst(Mnemonic_NEG, 1, rem_lo, rem_lo);
/* adc res_hi, 0 */ newInst(Mnemonic_ADC, 1, rem_hi, rem_hi, zero);
/* neg res_lo */ newInst(Mnemonic_NEG, 1, rem_hi, rem_hi);
connectNodeTo(finitaNode);
setCurrentNode(NULL);
fixQuotSignNode = (Node*)0xDEADBEEF;
//
// finitaNode:
//
setCurrentNode(finitaNode);
if (wantReminder) {
/* mov res_lo, rem_lo*/ newInst(Mnemonic_MOV, res_lo, rem_lo);
/* mov res_hi, rem_hi*/ newInst(Mnemonic_MOV, res_hi, rem_hi);
}
else {
/* mov res_lo, quot_lo*/ newInst(Mnemonic_MOV, res_lo, quot_lo);
/* mov res_hi, quot_hi*/ newInst(Mnemonic_MOV, res_hi, quot_hi);
}
connectNodes(finitaNode, getSubCfgReturnNode());
setCurrentNode(NULL);
finitaNode = (Node*)0xDEADBEEF;
Node* originalInstNode = inst->getNode();
propagateSubCFG(inst);
propagateDivRemResults(inst, originalInstNode, quot_lo, quot_hi, rem_lo, rem_hi);
}