in commons-numbers-core/src/main/java/org/apache/commons/numbers/core/DDMath.java [130:301]
private static DD computePowScaled(long b, double x, double xx, int n, long[] exp) {
// Same as DD.computePowScaled using a triple-double intermediate.
// triple-double multiplication:
// (a0, a1, a2) * (b0, b1, b2)
// a x b ~ a0b0 O(1) term
// + a0b1 + a1b0 O(eps) terms
// + a0b2 + a1b1 + a2b0 O(eps^2) terms
// + a1b2 + a2b1 O(eps^3) terms
// + a2b2 O(eps^4) term (not required for the first 159 bits)
// Higher terms require two-prod if the round-off is <= O(eps^3).
// (pij,qij) = two-prod(ai, bj); pij = O(eps^i+j); qij = O(eps^i+j+1)
// p00 O(1)
// p01, p10, q00 O(eps)
// p02, p11, p20, q01, q10 O(eps^2)
// p12, p21, q02, q11, q20 O(eps^3)
// Sum terms of the same order. Carry round-off to lower order:
// s0 = p00 Order(1)
// Sum (p01, p10, q00) -> (s1, r2, r3a) Order(eps)
// Sum (p02, p11, p20, q01, q10, r2) -> (s2, r3b) Order(eps^2)
// Sum (p12, p21, q02, q11, q20, r3a, r3b) -> s3 Order(eps^3)
//
// Simplifies for (b0, b1):
// Sum (p01, p10, q00) -> (s1, r2, r3a) Order(eps)
// Sum (p11, p20, q01, q10, r2) -> (s2, r3b) Order(eps^2)
// Sum (p21, q11, q20, r3a, r3b) -> s3 Order(eps^3)
//
// Simplifies for the square:
// Sum (2 * p01, q00) -> (s1, r2) Order(eps)
// Sum (2 * p02, 2 * q01, p11, r2) -> (s2, r3b) Order(eps^2)
// Sum (2 * p12, 2 * q02, q11, r3b) -> s3 Order(eps^3)
// Scale the input in [0.5, 1) to be above 1. Represented as 2^be * b.
final long be = b - 1;
final double b0 = x * 2;
final double b1 = xx * 2;
// Split b
final double b0h = DD.highPart(b0);
final double b0l = b0 - b0h;
final double b1h = DD.highPart(b1);
final double b1l = b1 - b1h;
// Initialise the result as x^1. Represented as 2^fe * f.
long fe = be;
double f0 = b0;
double f1 = b1;
double f2 = 0;
// Shift the highest set bit off the top.
// Any remaining bits are detected in the sign bit.
final int an = Math.abs(n);
final int shift = Integer.numberOfLeadingZeros(an) + 1;
int bits = an << shift;
DD t;
final MDD m = new MDD();
// Multiplication is done inline with some triple precision helper routines.
// Process remaining bits below highest set bit.
for (int i = 32 - shift; i != 0; i--, bits <<= 1) {
// Square the result
fe <<= 1;
double a0h = DD.highPart(f0);
double a0l = f0 - a0h;
double a1h = DD.highPart(f1);
double a1l = f1 - a1h;
double a2h = DD.highPart(f2);
double a2l = f2 - a2h;
double p00 = f0 * f0;
double q00 = DD.twoSquareLow(a0h, a0l, p00);
double p01 = f0 * f1;
double q01 = DD.twoProductLow(a0h, a0l, a1h, a1l, p01);
final double p02 = f0 * f2;
final double q02 = DD.twoProductLow(a0h, a0l, a2h, a2l, p02);
double p11 = f1 * f1;
double q11 = DD.twoSquareLow(a1h, a1l, p11);
final double p12 = f1 * f2;
double s0 = p00;
// Sum (2 * p01, q00) -> (s1, r2) Order(eps)
double s1 = 2 * p01 + q00;
double r2 = DD.twoSumLow(2 * p01, q00, s1);
// Sum (2 * p02, 2 * q01, p11, r2) -> (s2, r3b) Order(eps^2)
double s2 = p02 + q01;
double r3b = DD.twoSumLow(p02, q01, s2);
double u = p11 + r2;
double v = DD.twoSumLow(p11, r2, u);
t = DD.add(2 * s2, 2 * r3b, u, v);
s2 = t.hi();
r3b = t.lo();
// Sum (2 * p12, 2 * q02, q11, r3b) -> s3 Order(eps^3)
double s3 = 2 * (p12 + q02) + q11 + r3b;
f0 = norm3(s0, s1, s2, s3, m);
f1 = m.x;
f2 = m.xx;
// Rescale
if (Math.abs(f0) > SAFE_MULTIPLY) {
// Scale back to the [1, 2) range. As safe multiply is 2^500
// the exponent should be < 1001 so the twoPow scaling factor is supported.
final int e = Math.getExponent(f0);
final double s = DD.twoPow(-e);
fe += e;
f0 *= s;
f1 *= s;
f2 *= s;
}
if (bits < 0) {
// Multiply by b
fe += be;
a0h = DD.highPart(f0);
a0l = f0 - a0h;
a1h = DD.highPart(f1);
a1l = f1 - a1h;
a2h = DD.highPart(f2);
a2l = f2 - a2h;
p00 = f0 * b0;
q00 = DD.twoProductLow(a0h, a0l, b0h, b0l, p00);
p01 = f0 * b1;
q01 = DD.twoProductLow(a0h, a0l, b1h, b1l, p01);
final double p10 = f1 * b0;
final double q10 = DD.twoProductLow(a1h, a1l, b0h, b0l, p10);
p11 = f1 * b1;
q11 = DD.twoProductLow(a1h, a1l, b1h, b1l, p11);
final double p20 = f2 * b0;
final double q20 = DD.twoProductLow(a2h, a2l, b0h, b0l, p20);
final double p21 = f2 * b1;
s0 = p00;
// Sum (p01, p10, q00) -> (s1, r2, r3a) Order(eps)
u = p01 + p10;
v = DD.twoSumLow(p01, p10, u);
s1 = q00 + u;
final double w = DD.twoSumLow(q00, u, s1);
r2 = v + w;
final double r3a = DD.twoSumLow(v, w, r2);
// Sum (p11, p20, q01, q10, r2) -> (s2, r3b) Order(eps^2)
s2 = p11 + p20;
r3b = DD.twoSumLow(p11, p20, s2);
u = q01 + q10;
v = DD.twoSumLow(q01, q10, u);
t = DD.add(s2, r3b, u, v);
s2 = t.hi() + r2;
r3b = DD.twoSumLow(t.hi(), r2, s2);
// Sum (p21, q11, q20, r3a, r3b) -> s3 Order(eps^3)
s3 = p21 + q11 + q20 + r3a + r3b;
f0 = norm3(s0, s1, s2, s3, m);
f1 = m.x;
f2 = m.xx;
// Avoid rescale as x2 is in [1, 2)
}
}
// Ensure (f0, f1) are 1 ulp exact
final double u = f1 + f2;
t = DD.fastTwoSum(f0, u);
final int[] e = {0};
// If the power is negative, invert in triple precision
if (n < 0) {
// Require the round-off
final double v = DD.fastTwoSumLow(f1, f2, u);
// Result is in approximately [1, 2^501] so inversion is safe.
t = inverse3(t.hi(), t.lo(), v);
// Rescale to [0.5, 1.0]
t = t.frexp(e);
exp[0] = e[0] - fe;
return t;
}
t = t.frexp(e);
exp[0] = fe + e[0];
return t;
}