in lib/llvm/Support/APInt.cpp [1243:1405]
static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
unsigned m, unsigned n) {
assert(u && "Must provide dividend");
assert(v && "Must provide divisor");
assert(q && "Must provide quotient");
assert(u != v && u != q && v != q && "Must use different memory");
assert(n>1 && "n must be > 1");
// b denotes the base of the number system. In our case b is 2^32.
const uint64_t b = uint64_t(1) << 32;
// The DEBUG macros here tend to be spam in the debug output if you're not
// debugging this code. Disable them unless KNUTH_DEBUG is defined.
#pragma push_macro("LLVM_DEBUG")
#ifndef KNUTH_DEBUG
#undef LLVM_DEBUG
#define LLVM_DEBUG(X) \
do { \
} while (false)
#endif
LLVM_DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
LLVM_DEBUG(dbgs() << "KnuthDiv: original:");
LLVM_DEBUG(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
LLVM_DEBUG(dbgs() << " by");
LLVM_DEBUG(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
LLVM_DEBUG(dbgs() << '\n');
// D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
// u and v by d. Note that we have taken Knuth's advice here to use a power
// of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
// 2 allows us to shift instead of multiply and it is easy to determine the
// shift amount from the leading zeros. We are basically normalizing the u
// and v so that its high bits are shifted to the top of v's range without
// overflow. Note that this can require an extra word in u so that u must
// be of length m+n+1.
unsigned shift = countLeadingZeros(v[n-1]);
uint32_t v_carry = 0;
uint32_t u_carry = 0;
if (shift) {
for (unsigned i = 0; i < m+n; ++i) {
uint32_t u_tmp = u[i] >> (32 - shift);
u[i] = (u[i] << shift) | u_carry;
u_carry = u_tmp;
}
for (unsigned i = 0; i < n; ++i) {
uint32_t v_tmp = v[i] >> (32 - shift);
v[i] = (v[i] << shift) | v_carry;
v_carry = v_tmp;
}
}
u[m+n] = u_carry;
LLVM_DEBUG(dbgs() << "KnuthDiv: normal:");
LLVM_DEBUG(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
LLVM_DEBUG(dbgs() << " by");
LLVM_DEBUG(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
LLVM_DEBUG(dbgs() << '\n');
// D2. [Initialize j.] Set j to m. This is the loop counter over the places.
int j = m;
do {
LLVM_DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
// D3. [Calculate q'.].
// Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
// Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
// Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
// qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
// on v[n-2] determines at high speed most of the cases in which the trial
// value qp is one too large, and it eliminates all cases where qp is two
// too large.
uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
LLVM_DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
uint64_t qp = dividend / v[n-1];
uint64_t rp = dividend % v[n-1];
if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
qp--;
rp += v[n-1];
if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
qp--;
}
LLVM_DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
// D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
// (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
// consists of a simple multiplication by a one-place number, combined with
// a subtraction.
// The digits (u[j+n]...u[j]) should be kept positive; if the result of
// this step is actually negative, (u[j+n]...u[j]) should be left as the
// true value plus b**(n+1), namely as the b's complement of
// the true value, and a "borrow" to the left should be remembered.
int64_t borrow = 0;
for (unsigned i = 0; i < n; ++i) {
uint64_t p = uint64_t(qp) * uint64_t(v[i]);
int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
u[j+i] = Lo_32(subres);
borrow = Hi_32(p) - Hi_32(subres);
LLVM_DEBUG(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
<< ", borrow = " << borrow << '\n');
}
bool isNeg = u[j+n] < borrow;
u[j+n] -= Lo_32(borrow);
LLVM_DEBUG(dbgs() << "KnuthDiv: after subtraction:");
LLVM_DEBUG(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
LLVM_DEBUG(dbgs() << '\n');
// D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
// negative, go to step D6; otherwise go on to step D7.
q[j] = Lo_32(qp);
if (isNeg) {
// D6. [Add back]. The probability that this step is necessary is very
// small, on the order of only 2/b. Make sure that test data accounts for
// this possibility. Decrease q[j] by 1
q[j]--;
// and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
// A carry will occur to the left of u[j+n], and it should be ignored
// since it cancels with the borrow that occurred in D4.
bool carry = false;
for (unsigned i = 0; i < n; i++) {
uint32_t limit = std::min(u[j+i],v[i]);
u[j+i] += v[i] + carry;
carry = u[j+i] < limit || (carry && u[j+i] == limit);
}
u[j+n] += carry;
}
LLVM_DEBUG(dbgs() << "KnuthDiv: after correction:");
LLVM_DEBUG(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
LLVM_DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
// D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3.
} while (--j >= 0);
LLVM_DEBUG(dbgs() << "KnuthDiv: quotient:");
LLVM_DEBUG(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
LLVM_DEBUG(dbgs() << '\n');
// D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
// remainder may be obtained by dividing u[...] by d. If r is non-null we
// compute the remainder (urem uses this).
if (r) {
// The value d is expressed by the "shift" value above since we avoided
// multiplication by d by using a shift left. So, all we have to do is
// shift right here.
if (shift) {
uint32_t carry = 0;
LLVM_DEBUG(dbgs() << "KnuthDiv: remainder:");
for (int i = n-1; i >= 0; i--) {
r[i] = (u[i] >> shift) | carry;
carry = u[i] << (32 - shift);
LLVM_DEBUG(dbgs() << " " << r[i]);
}
} else {
for (int i = n-1; i >= 0; i--) {
r[i] = u[i];
LLVM_DEBUG(dbgs() << " " << r[i]);
}
}
LLVM_DEBUG(dbgs() << '\n');
}
LLVM_DEBUG(dbgs() << '\n');
#pragma pop_macro("LLVM_DEBUG")
}