vm/jitrino/src/optimizer/multiplybyconstant.cpp (904 lines of code) (raw):

/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * @author Intel, Pavel A. Ozhdikhin * */ #undef STANDALONE_TEST #include <iostream> #include <fstream> #include <float.h> #include <cstdlib> #include <algorithm> #ifdef STANDALONE_TEST #include "standalonetest2.h" #else #include "Opcode.h" #include "Opnd.h" #include "Type.h" #include "Inst.h" #include "IRBuilder.h" #include "BitSet.h" #include "Log.h" #include "optimizer.h" #include "simplifier.h" #include "constantfolder.h" #include "deadcodeeliminator.h" #include "optarithmetic.h" #include "irmanager.h" #include "CompilationContext.h" #include <float.h> #include <math.h> #include "Stl.h" #include "simplifier.h" #include "optarithmetic.h" namespace Jitrino { #ifndef DEBUG_MULTIPLYBYCONSTANT #define DEBUGPRINT(x) #define DEBUGPRINT2(x,y) #else void indent(int indentby) { for (int i=0; i<indentby; i++) { Log::out() << " "; } } #define DEBUGPRINT(x) indent(2*depth); Log::out() << x << ::std::endl #define DEBUGPRINT2(x,y) indent(2*depth); Log::out() << x; y.print(Log::out()); Log::out() << ::std::endl #endif #endif struct MulOp { enum Op { pushc=0, pushy=1, swap=2, dup=3, shladd=4, add=5, sub=6, neg=7, shiftl=8, dup2, dup3, numMulOps=11 }; }; const int methodLen = 400; const int stackDepth = 400; const int numMulOpnds[MulOp::numMulOps] = { 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0 }; const int COST_LOADZERO = 0; const int COST_LOADY = 0; const int COST_NEG = 1; const int COST_ADD = 1; const int COST_SUB = 1; class MulMethod { private: public: int data[methodLen]; int len; bool foripf; int COST_ADDSHIFT_SMALL; int COST_SHIFT; int SMALL_SHIFT_MAXBITS; MulMethod(bool ipf) : len(0), foripf(ipf) { if (ipf) { COST_ADDSHIFT_SMALL = 1; COST_SHIFT = 1; SMALL_SHIFT_MAXBITS = 4; } else { COST_ADDSHIFT_SMALL = 2; COST_SHIFT = 4; SMALL_SHIFT_MAXBITS = 3; } }; void append(MulOp::Op op0) { assert((0<=op0)&&(op0<MulOp::numMulOps)); assert(numMulOpnds[op0]==0); data[len] = op0; len += 1; }; void append(MulOp::Op op0op, int opnd0) { int op0 = (int) op0op; assert((0<=op0)&&(op0<MulOp::numMulOps)); assert(numMulOpnds[op0]==1); data[len] = op0; data[len+1] = opnd0; len += 2; }; void makespace(int k) { assert(len+k <= methodLen); assert(k>0); for (int i=len+k; i>k; i--) { data[i] = data[i-k]; } len += k; }; void prepend(MulOp::Op op0) { assert((0<=op0)&&(op0<MulOp::numMulOps)); assert(numMulOpnds[op0]==0); makespace(1); data[0] = op0; }; void prepend(MulOp::Op op0, int opnd0) { assert((0<=op0)&&(op0<MulOp::numMulOps)); assert(numMulOpnds[op0]==1); makespace(2); data[0] = op0; data[1] = opnd0; }; void append(const MulMethod &other) { assert(len+other.len <= methodLen); for (int i=0; i < other.len; i++) { data[len+i] = other.data[i]; } len += other.len; } void prepend(const MulMethod &other) { makespace(other.len); for (int i=0; i < other.len; i++) { data[i] = other.data[i]; } } int apply(int y, int *latency = 0) { int thestack[stackDepth]; int when[stackDepth]; thestack[0] = 0xdeadbeef; when[0] = -1; int ip = 0; int sp = 0; while (ip < len) { enum MulOp::Op op = (enum MulOp::Op) data[ip]; ip += 1; assert(sp < stackDepth); switch (op) { case MulOp::pushc: assert(ip<len); thestack[sp] = data[ip++]; when[sp] = 0; sp += 1; break; case MulOp::pushy: thestack[sp] = y; when[sp] = COST_LOADY; sp += 1; break; case MulOp::swap: assert(sp >= 2); ::std::swap(thestack[sp-1], thestack[sp-2]); ::std::swap(when[sp-1], when[sp-2]); break; case MulOp::dup: assert(sp >= 1); thestack[sp] = thestack[sp-1]; when[sp] = when[sp-1]; sp += 1; break; case MulOp::dup2: assert(sp >= 2); thestack[sp] = thestack[sp-2]; when[sp] = when[sp-2]; sp += 1; break; case MulOp::dup3: assert(sp >= 2); thestack[sp] = thestack[sp-3]; when[sp] = when[sp-3]; sp += 1; break; case MulOp::add: assert(sp >= 2); thestack[sp-2] = thestack[sp-2] + thestack[sp-1]; when[sp-2] = ::std::max(when[sp-2],when[sp-1]) + COST_ADD; sp -=1; break; case MulOp::sub: assert(sp >= 2); thestack[sp-2] = thestack[sp-2] - thestack[sp-1]; when[sp-2] = ::std::max(when[sp-2],when[sp-1]) + COST_SUB; sp -=1; break; case MulOp::shladd: assert(sp >= 2); assert(ip<len); assert(data[ip] <= SMALL_SHIFT_MAXBITS); thestack[sp-2] = (thestack[sp-2] << data[ip++]) + thestack[sp-1]; when[sp-2] = ::std::max(when[sp-2], when[sp-1]) + COST_ADDSHIFT_SMALL; sp -=1; break; case MulOp::neg: assert(sp >= 1); thestack[sp-1] = -thestack[sp-1]; when[sp-1] = when[sp-1] + COST_NEG; break; case MulOp::shiftl: assert(sp >= 1); assert(ip<len); thestack[sp-1] = thestack[sp-1] << data[ip++]; when[sp-1] = when[sp-1]+ COST_SHIFT; break; default: assert(0); } } if (sp != 1) { ::std::cerr << ::std::endl; ::std::cerr << "sp != 1 after applying: "; printOps(::std::cerr); ::std::cerr << ::std::endl; assert(0); } if (latency) { *latency = when[0]; } return thestack[0]; }; void printOps(::std::ostream &outs) const { int ip = 0; while (ip < len) { enum MulOp::Op op = (enum MulOp::Op) data[ip]; ip += 1; switch (op) { case MulOp::pushc: outs << "push " << data[ip++] << "; "; break; case MulOp::pushy: outs << "push y; "; break; case MulOp::swap: outs << "swap; "; break; case MulOp::dup: outs << "dup; "; break; case MulOp::dup2: outs << "dup2; "; break; case MulOp::dup3: outs << "dup3; "; break; case MulOp::add: outs << "add; "; break; case MulOp::sub: outs << "sub; "; break; case MulOp::shladd: outs << "shladd " << data[ip++] << "; "; break; case MulOp::neg: outs << "neg; "; break; case MulOp::shiftl: outs << "shiftl " << data[ip++] << "; "; break; default: break; } } } enum What { IsConstant, IsY, IsSub }; static void printSub(::std::ostream &outs, int onstack, enum What what) { switch (what) { case IsConstant: outs << onstack; break; case IsY: outs << "Y"; break; case IsSub: outs << "t" << onstack; break; default: assert(0); } } void print(::std::ostream &outs) const { int thestack[stackDepth]; What what[stackDepth]; thestack[0] = 0xdeadbeef; what[0] = IsConstant; int ip = 0; int sp = 0; int subnum = 0; if (len == 0) return; // no reduction while (ip < len) { enum MulOp::Op op = (enum MulOp::Op) data[ip]; ip += 1; assert(sp < stackDepth); switch (op) { case MulOp::pushc: assert(ip<len); thestack[sp] = data[ip++]; what[sp] = IsConstant; sp += 1; break; case MulOp::pushy: thestack[sp] = 0; what[sp] = IsY; sp += 1; break; case MulOp::swap: assert(sp >= 2); ::std::swap(thestack[sp-1], thestack[sp-2]); ::std::swap(what[sp-1], what[sp-2]); break; case MulOp::dup: assert(sp >= 1); thestack[sp] = thestack[sp-1]; what[sp] = what[sp-1]; sp += 1; break; case MulOp::dup2: assert(sp >= 2); thestack[sp] = thestack[sp-2]; what[sp] = what[sp-2]; sp += 1; break; case MulOp::dup3: assert(sp >= 2); thestack[sp] = thestack[sp-3]; what[sp] = what[sp-3]; sp += 1; break; case MulOp::add: { assert(sp >= 2); int thissub = ++subnum; outs << "t" << thissub << " = add "; printSub(outs, thestack[sp-2], what[sp-2]); outs << ", "; printSub(outs, thestack[sp-1], what[sp-1]); outs << "; "; thestack[sp-2] = thissub; what[sp-2] = IsSub; sp -=1; break; } case MulOp::sub: { assert(sp >= 2); int thissub = ++subnum; outs << "t" << thissub << " = sub "; printSub(outs, thestack[sp-2], what[sp-2]); outs << ", "; printSub(outs, thestack[sp-1], what[sp-1]); outs << "; "; thestack[sp-2] = thissub; what[sp-2] = IsSub; sp -=1; break; } case MulOp::shladd: { assert(sp >= 2); assert(ip<len); assert(data[ip] <= SMALL_SHIFT_MAXBITS); int thissub = ++subnum; outs << "t" << thissub << " = shladd "; printSub(outs, thestack[sp-2], what[sp-2]); outs << ", " << data[ip++] << ", "; printSub(outs, thestack[sp-1], what[sp-1]); outs << "; "; thestack[sp-2] = thissub; what[sp-2] = IsSub; sp -=1; break; } case MulOp::neg: { assert(sp >= 1); int thissub = ++subnum; outs << "t" << thissub << " = neg "; printSub(outs, thestack[sp-1], what[sp-1]); outs << "; "; thestack[sp-1] = thissub; what[sp-1] = IsSub; break; } case MulOp::shiftl: { assert(sp >= 1); assert(ip<len); int thissub = ++subnum; outs << "t" << thissub << " = shiftl "; printSub(outs, thestack[sp-1], what[sp-1]); outs << ", " << data[ip++] << "; "; thestack[sp-1] = thissub; what[sp-1] = IsSub; break; } default: assert(0); } } if (sp != 1) { ::std::cerr << ::std::endl; ::std::cerr << "sp != 1 after applying: "; printOps(::std::cerr); ::std::cerr << ::std::endl; assert(0); } outs << "r = "; printSub(outs, thestack[sp-1], what[sp-1]); }; int getCost() { if (len == 0) { return 10000; } else { int latency; apply(13, &latency); return latency; } } #ifndef STANDALONE_TEST Opnd *genCode(Type *type, Simplifier *simp, Opnd *y) { Opnd *thestack[stackDepth]; Type::Tag tag = y->getType()->tag; bool width32 = ((tag == Type::Int32) || (tag == Type::UInt32)); thestack[0] = 0; int ip = 0; int sp = 0; if (len == 0) return NULL; // no reduction while (ip < len) { enum MulOp::Op op = (enum MulOp::Op) data[ip]; ip += 1; assert(sp < stackDepth); switch (op) { case MulOp::pushc: { assert(ip<len); int val = data[ip]; ip += 1; Opnd *op = (width32 ? simp->genLdConstant((I_32)(val))->getDst() : simp->genLdConstant((int64)(val))->getDst()); thestack[sp] = op; sp += 1; } break; case MulOp::pushy: thestack[sp] = y; sp += 1; break; case MulOp::swap: assert(sp >= 2); ::std::swap(thestack[sp-1], thestack[sp-2]); break; case MulOp::dup: assert(sp >= 1); thestack[sp] = thestack[sp-1]; sp += 1; break; case MulOp::dup2: assert(sp >= 2); thestack[sp] = thestack[sp-2]; sp += 1; break; case MulOp::dup3: assert(sp >= 2); thestack[sp] = thestack[sp-3]; sp += 1; break; case MulOp::add: assert(sp >= 2); thestack[sp-2] = simp->genAdd(type, Modifier(Overflow_None)|Modifier(Exception_Never)|Modifier(Strict_No), thestack[sp-2], thestack[sp-1])->getDst(); sp -=1; break; case MulOp::sub: assert(sp >= 2); thestack[sp-2] = simp->genSub(type, Modifier(Overflow_None)|Modifier(Exception_Never)|Modifier(Strict_No), thestack[sp-2], thestack[sp-1])->getDst(); sp -=1; break; case MulOp::shladd: { assert(sp >= 2); assert(ip<len); int val = data[ip]; ip += 1; assert(val <= SMALL_SHIFT_MAXBITS); Opnd *op = simp->genLdConstant((I_32)(val))->getDst(); thestack[sp-2] = simp->genShladd(type, thestack[sp-2], op, thestack[sp-1])->getDst(); sp -=1; } break; case MulOp::neg: assert(sp >= 1); thestack[sp-1] = simp->genNeg(type, thestack[sp-1])->getDst(); break; case MulOp::shiftl: { assert(sp >= 1); assert(ip<len); int val = data[ip]; ip += 1; Opnd *op = simp->genLdConstant((I_32)(val))->getDst(); thestack[sp-1] = simp->genShl(type, ShiftMask_None, thestack[sp-1], op)->getDst(); } break; default: assert(0); } } assert(sp == 1); return thestack[0]; } #endif // !STANDALONE_TEST }; template <typename inttype, int width> bool testMul(MulMethod &method, inttype d, inttype num); template <typename inttype, int width> void planMulCompound(MulMethod &m, inttype d, int depth); template <typename inttype, int width> void planMulLookup(MulMethod &m, inttype d, int depth); template <typename inttype, int width> void planMulFactor(MulMethod &m, inttype d, int depth); template <typename inttype, int width> void planMulLarge(MulMethod &m, inttype d, int depth); template <typename inttype, int width> void planMulNeg(MulMethod &m, inttype d, int depth); template <typename inttype, int width> void planMul(MulMethod &m, inttype d, int depth); template <typename inttype, int width> void planMul(MulMethod &m, inttype d, int depth) { DEBUGPRINT("planMul(" << (int)d << ")"); if (d == -1) { m.append(MulOp::pushy); m.append(MulOp::neg); } else if (d == 1) { m.append(MulOp::pushy); } else if (d == 0) { m.append(MulOp::pushc, 0); } else if (d == -d) { int k = width - 1; m.append(MulOp::pushy); m.append(MulOp::shiftl, k); } else { /* ONLY DO REDUCTION FOR THE TWO CASES */ if ((d < 32) && (d > 0)) { planMulLookup<inttype, width>(m, d, depth+1); } else if (isPowerOf2(d)) { int k = whichPowerOf2<inttype, width>(d); m.append(MulOp::pushy); m.append(MulOp::shiftl, k); } /* if (d < 0) { planMulNeg<inttype, width>(m, d, depth); } else if (d < 32) { planMulLookup<inttype, width>(m, d, depth+1); } else if (isPowerOf2(d)) { int k = whichPowerOf2<inttype, width>(d); m.append(MulOp::pushy); m.append(MulOp::shiftl, k); } else { planMulLarge<inttype, width>(m, d, depth+1); } */ } #ifdef TEST_OUTPUT DEBUGPRINT2("planMul(" << d << ")", res); for (int i=0; i<10000; i += 17) { inttype num = i; if (!(testMul<inttype, width>(m, res, d, num) && testMul<inttype, width>(m, res, d, -num) && testMul<inttype, width>(m, res, d, 0x7fffffff+num) && testMul<inttype, width>(m, res, d, 0x7fffffff-num))) { return; } } #endif } template <typename inttype, int width> void planMulNeg(MulMethod &m, inttype d, int depth) { DEBUGPRINT("planMulNeg(" << (int)d << ")"); if (depth == 1) { // this seems to win by 2 cycles only in about 2/25000 cases // and by 1 cycle only in about 1/50 cases DEBUGPRINT("planMulNeg(" << (int)d << ") case 1"); MulMethod res1(m.foripf); planMul<inttype, width>(res1, -d, depth+1); res1.append(MulOp::neg); // this seems to win by 2 cycles in about 1/250 cases // and by 1 cycle in 1/10 cases // but that's not enough to justify a search DEBUGPRINT("planMulNeg(" << (int) d << ") case 2"); MulMethod res2(m.foripf); inttype dinv = ~d; int numones = nlz<inttype, width>(dinv); int shiftby = width - numones; inttype newd = d + ((inttype)1 << shiftby); if (shiftby <= m.SMALL_SHIFT_MAXBITS) { DEBUGPRINT("planMulNeg(" << (int) d << ") case 2a, shiftby=" << shiftby); res2.append(MulOp::pushy); res2.append(MulOp::neg); planMul<inttype, width>(res2, newd, depth+1); res2.append(MulOp::shladd, shiftby); } else { DEBUGPRINT("planMulNeg(" << (int) d << ") case 2b, shiftby=" << shiftby); planMul<inttype, width>(res2, newd, depth+1); res2.append(MulOp::pushy); res2.append(MulOp::shiftl, shiftby); res2.append(MulOp::sub); } DEBUGPRINT("planMulNeg(" << (int) d << ") case 3"); MulMethod res3(m.foripf); planMulCompound<inttype, width>(res3, d, depth+1); int latency1 = res1.getCost(); int latency2 = res2.getCost(); int latency3 = res3.getCost(); if ((latency1 <= latency2) && (latency1 <= latency3)) { if (latency2 < latency3) { DEBUGPRINT("choose neg method 1 by " << latency2-latency1 << " over 2"); } else if (latency3 < latency2) { DEBUGPRINT("choose neg method 1 by " << latency3-latency1 << " over 3"); } else { DEBUGPRINT("choose neg method 1 by " << latency2-latency1 << " over 2 & 3"); } m.append(res1); } else if (latency2 < latency3) { if (latency1 < latency3) { DEBUGPRINT("choose neg method 2 by " << latency1 - latency2 << " over 1"); } if (latency1 > latency3) { DEBUGPRINT("choose neg method 2 by " << latency3 - latency2 << " over 3"); } else { DEBUGPRINT("choose neg method 2 by " << latency3 - latency2 << " over 1 & 3"); } m.append(res2); } else { if (latency1 < latency2) { DEBUGPRINT("choose neg method 3 by " << latency1 - latency3 << " over 1"); } if (latency1 > latency2) { DEBUGPRINT("choose neg method 3 by " << latency2 - latency3 << " over 2"); } else { DEBUGPRINT("choose neg method 3 by " << latency2 - latency3 << " over 1 & 2"); } m.append(res3); } } else { DEBUGPRINT("planMulNeg(" << (int) d << ") case 3a"); planMulCompound<inttype, width>(m, d, depth+1); } DEBUGPRINT("done planMulNeg(" << (int) d << ")"); } template <typename inttype, int width> void planMulFactor(MulMethod &m, inttype d, int depth) { DEBUGPRINT("planMulFactor(" << (int) d << ")"); MulMethod res2(m.foripf); int shiftby = m.SMALL_SHIFT_MAXBITS; inttype factor = (1 << shiftby) + 1; // small factors while (factor > 2) { if ((d % factor) == 0) { DEBUGPRINT("planMulFactor(" << (int) d << ") found factor " << (int) factor); MulMethod res1(m.foripf); inttype quot = d / factor; planMul<inttype, width>(res1, quot, depth+1); res1.append(MulOp::dup); res1.append(MulOp::shladd, shiftby); m.append(res1); return; } factor = (factor >> 1) + 1; shiftby -= 1; } // bigger factors shiftby = width - 2; factor = (1 << shiftby) + 1; while (factor > (1 << m.SMALL_SHIFT_MAXBITS) + 1) { if ((d % factor) == 0) { DEBUGPRINT("planMulFactor(" << (int) d << ") found factor " << (int) factor); MulMethod res1(m.foripf); inttype quot = d / factor; planMul<inttype, width>(res1, quot, depth+1); res1.append(MulOp::dup); res1.append(MulOp::shiftl, shiftby); res1.append(MulOp::add); m.append(res1); return; } factor = (factor >> 1) + 1; shiftby -= 1; } // bigger factors shiftby = width - 2; factor = (1 << (int) shiftby) - 1; while (factor > 3) { if ((d % factor) == 0) { MulMethod res1(m.foripf); if (d > 0) { DEBUGPRINT("planMulFactor(" << (int) d << ") found factor " << (int) factor); inttype quot = d / factor; planMul<inttype, width>(res1, quot, depth+1); res1.append(MulOp::dup); res1.append(MulOp::shiftl, shiftby); res1.append(MulOp::swap); res1.append(MulOp::sub); m.append(res1); return; } else { DEBUGPRINT("planMulFactor(" << (int) d << ") found factor " << (int) -factor); inttype quot = -d / factor; planMul<inttype, width>(res1, quot, depth+1); res1.append(MulOp::dup); res1.append(MulOp::shiftl, shiftby); res1.append(MulOp::sub); m.append(res1); return; } } factor = ((factor+1) >> 1) - 1; shiftby -= 1; } DEBUGPRINT("done planMulFactor(" << (int) d << ")"); m.append(res2); } template <typename inttype, int width> void planMulLarge(MulMethod &m, inttype d, int depth) { if (depth < 6) { MulMethod res1(m.foripf); planMulCompound<inttype, width>(res1, d, depth); MulMethod res2(m.foripf); planMulFactor<inttype, width>(res2, d, depth); int latency1 = res1.getCost(); int latency2 = res2.getCost(); if (latency1 <= latency2) { DEBUGPRINT("choose large method 1: compound"); m.append(res1); } else { DEBUGPRINT("choose large method 2: factor"); m.append(res2); } } else { planMulCompound<inttype, width>(m, d, depth); } } template <typename inttype, int width> void planMulCompound(MulMethod &m, inttype d, int depth) { assert((d < 0) || (d > 31)); DEBUGPRINT("planMulCompound(" << (int) d << ")"); inttype delta = d ^ (d >> 1); // bits differing from from bit to left int numChanges = popcount<inttype, width>(delta); int deltaright = ntz<inttype, width>(delta); int deltaleft = nlz<inttype, width>(delta); int bitswidth = width - deltaright - deltaleft - 1; int small_shift_maxbits = m.SMALL_SHIFT_MAXBITS; if ((numChanges < 4) || (bitswidth <= 5)) { // small enough to worry about sign/rightbit issues DEBUGPRINT("case few changes"); // we assume we've dealt with <31 elsewhere, so we must have some shifting ahead of us. if ((d & 1) == 1) { inttype dinv = ~d; DEBUGPRINT("case odd"); int rightones = ntz<inttype, width>(dinv); if (rightones == 1) { inttype dsub1 = d-1; int dsub1rightzeros = ntz<inttype, width>(dsub1); if (dsub1rightzeros > small_shift_maxbits) { planMul<inttype, width>(m, dsub1 >> small_shift_maxbits, depth+1); m.append(MulOp::pushy); m.append(MulOp::shladd, small_shift_maxbits); } else { planMul<inttype, width>(m, dsub1 >> dsub1rightzeros, depth+1); m.append(MulOp::pushy); m.append(MulOp::shladd, dsub1rightzeros); } } else if (rightones <= small_shift_maxbits) { planMul<inttype, width>(m, (d+1)>>rightones, depth+1); m.append(MulOp::pushy); m.append(MulOp::neg); m.append(MulOp::shladd, rightones); } else { planMul<inttype, width>(m, (d+1)>>small_shift_maxbits, depth+1); m.append(MulOp::pushy); m.append(MulOp::neg); m.append(MulOp::shladd, small_shift_maxbits); } } else { DEBUGPRINT("case even"); int rightzeros = deltaright+1; assert((rightzeros == ntz<inttype, width>(d))); if (bitswidth <= 5) { DEBUGPRINT("case narrow"); if ((rightzeros <= small_shift_maxbits) && (rightzeros + bitswidth >= 2 * small_shift_maxbits)) { DEBUGPRINT("case smallshiftadd"); inttype newd = d + ((inttype)1 << rightzeros); m.append(MulOp::pushy); m.append(MulOp::neg); planMul<inttype, width>(m, newd, depth+1); m.append(MulOp::shladd, rightzeros); } else { inttype newd = d >> rightzeros; DEBUGPRINT("case smallshift"); planMul<inttype, width>(m, newd, depth+1); m.append(MulOp::shiftl, rightzeros); } } else { DEBUGPRINT("case even, wider"); inttype deltasubright = delta - ((inttype)1 << deltaright); int deltasubright_ntz = ntz<inttype, width>(deltasubright); int region2 = deltasubright_ntz - deltaright +1; assert(region2 > 0); if (region2 > 2) { inttype newd = d + ((inttype)1 << rightzeros); if (rightzeros <= small_shift_maxbits) { DEBUGPRINT("case wide smalladdshift"); m.append(MulOp::pushy); m.append(MulOp::neg); planMul<inttype, width>(m, newd, depth+1); m.append(MulOp::shladd, rightzeros); } else { DEBUGPRINT("case wide sub of shift"); planMul<inttype, width>(m, newd, depth+1); m.append(MulOp::pushy); m.append(MulOp::shiftl, rightzeros); m.append(MulOp::sub); } } else if (region2 == 2) { inttype newd = d - (3 << rightzeros); DEBUGPRINT("case wide addshift3"); if (rightzeros <= small_shift_maxbits) { m.append(MulOp::pushy); m.append(MulOp::pushy); m.append(MulOp::shladd, 1); planMul<inttype, width>(m, newd, depth+1); m.append(MulOp::shladd, rightzeros); } else { DEBUGPRINT("case wide add shift3"); planMul<inttype, width>(m, newd >> small_shift_maxbits, depth+1); m.append(MulOp::pushy); m.append(MulOp::pushy); m.append(MulOp::shladd, 1); m.append(MulOp::shiftl, rightzeros); m.append(MulOp::shladd, small_shift_maxbits); } } else { // region2 == 1 inttype newd = d - ((inttype)1 << rightzeros); if (rightzeros <= small_shift_maxbits) { DEBUGPRINT("case wide smalladdshift1"); m.append(MulOp::pushy); planMul<inttype, width>(m, newd, depth+1); m.append(MulOp::shladd, rightzeros); } else { DEBUGPRINT("case wide sub of shift"); planMul<inttype, width>(m, newd >> small_shift_maxbits, depth+1); m.append(MulOp::pushy); m.append(MulOp::shiftl, rightzeros); m.append(MulOp::shladd, small_shift_maxbits); } } } } } else { // bitswidth > 5 assert(numChanges >= 2); DEBUGPRINT("case wide"); if (numChanges == 2) { DEBUGPRINT("case numChanges==2"); if ((numChanges&1)==0) { DEBUGPRINT("case wide changes=2 even"); int rightzeros = deltaright+1; int leftzeros = deltaleft; assert((rightzeros == ntz<inttype, width>(d))); assert((leftzeros == nlz<inttype, width>(d))); if (rightzeros <= small_shift_maxbits) { DEBUGPRINT("case wide smallshiftadd"); assert(rightzeros+bitswidth >= 2 * small_shift_maxbits); m.append(MulOp::pushy); m.append(MulOp::neg); m.append(MulOp::pushy); m.append(MulOp::shiftl, width - leftzeros); m.append(MulOp::shladd, rightzeros); } else { DEBUGPRINT("case wide narrow sub"); m.append(MulOp::pushy); m.append(MulOp::shiftl, width - leftzeros); m.append(MulOp::pushy); m.append(MulOp::shiftl, rightzeros); m.append(MulOp::sub); } } else { DEBUGPRINT("case wide changes=2 odd"); } } else { // first try: use point width/2 to split number DEBUGPRINT("case numChanges>2"); int rightzeros = ((d & 1)!=0) ? 0 : deltaright+1; #ifndef NDEBUG int leftzeros = ((d & ((inttype)1<<(width-1))) != 0) ? 0 : deltaleft; #endif assert((rightzeros == ntz<inttype, width>(d))); assert((leftzeros == nlz<inttype, width>(d))); int k = (numChanges + 1) >> 2; assert(k > 0); // we had numChanges>=3 above. inttype stripright = d; while (k > 0) { // turn rightmost group of 0s into 1s, no effect if none inttype striprightzeros = stripright | (stripright - 1); // turn rightmost group of 1s into 0s, must be some inttype srzinv = ~striprightzeros; // first invert inttype sroinv = srzinv | (srzinv - 1); // strip 0s stripright = ~sroinv; // invert back k -= 1; } // now stripright is d but with 0s from about the middle // of changes to the right // find a point one place to the left of the set of 0s int zeropoint = ntz<inttype, width>(stripright); inttype zerobit = (inttype(1))<<zeropoint; assert((d & zerobit) != 0); assert((d & (zerobit>>1)) == 0); DEBUGPRINT("zerobit is " << (int) zerobit); DEBUGPRINT("zeropoint is " << (int) zeropoint); inttype rightmask = zerobit - 1; inttype leftmask = ~rightmask; DEBUGPRINT("leftmask is " << (int) leftmask << ", rightmask is " << (int) rightmask); inttype rightd = d & rightmask; inttype leftd = d & leftmask; DEBUGPRINT("leftd is " << (int) leftd << ", rightd is " << (int) rightd); int leftdrightzeros = ntz<inttype, width>(leftd); if (rightzeros == 0) { DEBUGPRINT("case 0"); if (leftdrightzeros < small_shift_maxbits) { DEBUGPRINT("case 0a"); planMul<inttype, width>(m, leftd >> leftdrightzeros, depth+1); planMul<inttype, width>(m, rightd, depth+1); m.append(MulOp::shladd, leftdrightzeros); } else { planMul<inttype, width>(m, leftd, depth+1); planMul<inttype, width>(m, rightd, depth+1); m.append(MulOp::add); } } else if ((0 < rightzeros) && (rightzeros <= small_shift_maxbits)) { DEBUGPRINT("case 1"); planMul<inttype, width>(m, rightd >> rightzeros, depth+1); planMul<inttype, width>(m, leftd, depth+1); m.append(MulOp::shladd, rightzeros); } else { DEBUGPRINT("case 3"); planMul<inttype, width>(m, leftd >> small_shift_maxbits, depth+1); planMul<inttype, width>(m, rightd, depth+1); m.append(MulOp::shladd, small_shift_maxbits); } DEBUGPRINT("done"); } } } template <typename inttype, int width> void planMulLookup(MulMethod &m, inttype d, int depth) { assert((d>=0) && (d < 32)); DEBUGPRINT("planMulLookup(" << (int) d << ")"); switch (d) { case 0: m.append(MulOp::pushc, 0); break; case 1: m.append(MulOp::pushy); break; case 2: m.append(MulOp::pushy); m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 1); break; case 3: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1); break; case 4: m.append(MulOp::pushy); m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 2); break; case 5: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 2); break; case 6: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1); m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 1); break; case 7: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1); m.append(MulOp::pushy); m.append(MulOp::shladd, 1); break; case 8: m.append(MulOp::pushy); m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 3); break; case 9: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 3); break; case 10: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 2); m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 1); break; case 11: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 2); m.append(MulOp::pushy); m.append(MulOp::shladd, 1); break; case 12: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1); m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 2); break; case 13: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1); m.append(MulOp::pushy); m.append(MulOp::shladd, 2); break; case 14: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1); m.append(MulOp::pushy); m.append(MulOp::shladd, 1); m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 1); break; case 15: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1); m.append(MulOp::dup); m.append(MulOp::shladd, 2); break; case 16: m.append(MulOp::pushy); if (m.SMALL_SHIFT_MAXBITS == 4) { m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 4); } else if (m.SMALL_SHIFT_MAXBITS == 3) { m.append(MulOp::shiftl, 4); } else { assert(0); } break; case 17: m.append(MulOp::pushy); m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 3); m.append(MulOp::pushy); m.append(MulOp::shladd, 1); break; case 18: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 3); m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 1); break; case 19: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 3); m.append(MulOp::pushy); m.append(MulOp::shladd, 1); break; case 20: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 2); m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 2); break; case 21: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 2); m.append(MulOp::pushy); m.append(MulOp::shladd, 2); break; case 22: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 2); m.append(MulOp::pushy); m.append(MulOp::shladd, 1); m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 1); break; case 23: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 2); m.append(MulOp::pushy); m.append(MulOp::shladd, 1); m.append(MulOp::pushy); m.append(MulOp::shladd, 1); break; case 24: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1); m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 3); break; case 25: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1); m.append(MulOp::pushy); m.append(MulOp::shladd, 3); break; case 26: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1); m.append(MulOp::pushy); m.append(MulOp::shladd, 2); m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 1); break; case 27: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1); m.append(MulOp::dup); m.append(MulOp::shladd, 3); break; case 28: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1); m.append(MulOp::pushy); m.append(MulOp::shladd, 1); m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 2); break; case 29: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1); m.append(MulOp::pushy); m.append(MulOp::shladd, 1); m.append(MulOp::pushy); m.append(MulOp::shladd, 2); break; case 30: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1); m.append(MulOp::dup); m.append(MulOp::shladd, 2); m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 1); break; case 31: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1); m.append(MulOp::dup); m.append(MulOp::shladd, 2); m.append(MulOp::pushy); m.append(MulOp::shladd, 1); break; default: assert(0); } } #ifdef STANDALONE_TEST #include "testharness2.h" #else // !STANDALONE_TEST Opnd * Simplifier::planMul32(I_32 multiplier, Opnd *opnd) { const OptimizerFlags& optimizerFlags = irManager.getOptimizerFlags(); MulMethod method(!optimizerFlags.ia32_code_gen); planMul<I_32, 32>(method, multiplier, 1); if (Log::isEnabled()) { Log::out() << "in multiply(" << (int) multiplier << ", "; opnd->print(Log::out()); Log::out() << "), method is "; method.print(Log::out()); Log::out() << ::std::endl; } return method.genCode(opnd->getType(), this, opnd); } Opnd * Simplifier::planMul64(int64 multiplier, Opnd *opnd) { const OptimizerFlags& optimizerFlags = irManager.getOptimizerFlags(); MulMethod method(!optimizerFlags.ia32_code_gen); planMul<int64, 64>(method, multiplier, 1); if (Log::isEnabled()) { Log::out() << "in multiply(" << (int) multiplier << ", "; opnd->print(Log::out()); Log::out() << "), method is "; method.print(Log::out()); Log::out() << ::std::endl; } return method.genCode(opnd->getType(), this, opnd); } #endif } //namespace Jitrino