in commons-statistics-inference/src/main/java/org/apache/commons/statistics/inference/KolmogorovSmirnovDistribution.java [826:978]
static double sf(double x, int n, ScaledPower power) {
// Compute only the SF using Algorithm 1 pp 12.
// Compute: k = floor(n*x), alpha = nx - k; x = (k+alpha)/n with 0 <= alpha < 1
final double[] alpha = {0};
final int k = splitX(n, x, alpha);
// Choose the algorithm:
// Eq (13) Smirnov/Birnbaum-Tingey; or Smirnov/Dwass Eq (31)
// Eq. 13 sums j = 0 : floor( n(1-x) ) = n - 1 - floor(nx) iff alpha != 0; else n - floor(nx)
// Eq. 31 sums j = ceil( n(1-x) ) : n = n - floor(nx)
// Drop a term term if x = (n-j)/n. Equates to shifting the floor* down and ceil* up:
// Eq. 13 N = floor*( n(1-x) ) = n - k - ((alpha!=0) ? 1 : 0) - ((alpha==0) ? 1 : 0)
// Eq. 31 N = n - ceil*( n(1-x) ) = k - ((alpha==0) ? 1 : 0)
// Where N is the number of terms - 1. This differs from Algorithm 1 by dropping
// a SD term when it should be zero (to working precision).
final int regN = n - k - 1;
final int sdN = k - ((alpha[0] == 0) ? 1 : 0);
// SD : Figure 3 (c) (pp. 6)
// Terms Aj (j = n -> 0) have alternating signs through the range and may involve
// numbers much bigger than 1 causing cancellation; magnitudes increase then decrease.
// Section 3.3: Extra digits of precision required
// grows like Order(sqrt(n)). E.g. sf=0.7 (x ~ 0.4/sqrt(n)) loses 8 digits.
//
// Regular : Figure 3 (a, b)
// Terms Aj can have similar magnitude through the range; when x >= 1/sqrt(n)
// the final few terms can be magnitudes smaller and could be ignored.
// Section 3.4: As x increases the magnitude of terms becomes more peaked,
// centred at j = (n-nx)/2, i.e. 50% of the terms.
//
// As n -> inf the sf for x = k/n agrees with the asymptote Eq 5 in log2(n) bits.
//
// Figure 4 has lines at x = 1/n and x = 3/sqrt(n).
// Point between is approximately x = 4/n, i.e. nx < 4 : k <= 3.
// If faster when x < 0.5 and requiring nx ~ 4 then requires n >= 8.
//
// Note: If SD accuracy scales with sqrt(n) then we could use 1 / sqrt(n).
// That threshold is always above 4 / n when n is 16 (4/n = 1/sqrt(n) : n = 4^2).
// So the current thresholds are conservative.
boolean sd = false;
if (sdN < regN) {
// Here x < 0.5 and SD has fewer terms
// Always choose when we only have one additional term (i.e x < 2/n)
sd = sdN <= 1;
// Otherwise when x < 4 / n
sd |= sdN <= SD_MAX_TERMS && n >= SD_MIN_N;
}
final int maxN = sd ? sdN : regN;
// Note: if N > "very large" use the asymptotic approximation.
// Currently this check is done on n (sample size) in the calling function.
// This provides a monotonic p-value for all x with the same n.
// Configure the algorithm.
// The error of double-double addition and multiplication is low (< 2^-102).
// The error in Aj is mainly from the power function.
// fastPow error is around 2^-52, pow error is ~ 2^-70 or lower.
// Smirnoff-Dwass has a sum of terms that cancel and requires higher precision.
// The power can optionally be specified.
final ScaledPower fpow;
if (power == POWER_DEFAULT) {
// SD has only a few terms. Use a high accuracy power.
fpow = sd ? DDMath::pow : DD::pow;
} else {
fpow = power;
}
// For the regular summation we must sum at least 50% of the terms. The number
// of required bits to sum remaining terms of the same magnitude is log2(N/2).
// These guards bits are conservative and > ~99% of terms are typically used.
final int sumBits = sd ? SD_SUM_PRECISION_BITS : SUM_PRECISION_BITS + log2(maxN >> 1);
// Working variable for the exponent of scaled values
final int[] ie = {0};
final long[] le = {0};
// The terms Aj may over/underflow.
// This is handled by maintaining the sum(Aj) using a fractional representation.
// sum(Aj) maintained as 2^e * f with f in [0.5, 1)
DD sum;
long esum;
// Compute A0
if (sd) {
// A0 = (1+x)^(n-1)
sum = fpow.pow(DD.ofSum(1, x), n - 1, le);
esum = le[0];
} else {
// A0 = (1-x)^n / x
sum = fpow.pow(DD.ofDifference(1, x), n, le);
esum = le[0];
// x in (1/n, 1 - 1/n) so the divide of the fraction is safe
sum = sum.divide(x).frexp(ie);
esum += ie[0];
}
// Binomial coefficient c(n, j) maintained as 2^e * f with f in [1, 2)
// This value is integral but maintained to limited precision
DD c = DD.ONE;
long ec = 0;
for (int i = 1; i <= maxN; i++) {
// c(n, j) = c(n, j-1) * (n-j+1) / j
c = c.multiply(DD.fromQuotient(n - i + 1, i));
// Here we maintain c in [1, 2) to restrict the scaled Aj term to [0.25, 2].
final int b = Math.getExponent(c.hi());
if (b != 0) {
c = c.scalb(-b);
ec += b;
}
// Compute Aj
final int j = sd ? n - i : i;
// Algorithm 4 pp. 27
// S = ((j/n) + x)^(j-1)
// T = ((n-j)/n - x)^(n-j)
final DD s = fpow.pow(DD.fromQuotient(j, n).add(x), j - 1, le);
final long es = le[0];
final DD t = fpow.pow(DD.fromQuotient(n - j, n).subtract(x), n - j, le);
final long et = le[0];
// Aj = C(n, j) * T * S
// = 2^e * [1, 2] * [0.5, 1] * [0.5, 1]
// = 2^e * [0.25, 2]
final long eaj = ec + es + et;
// Only compute and add to the sum when the exponents overlap by n-bits.
if (eaj > esum - sumBits) {
DD aj = c.multiply(t).multiply(s);
// Scaling must offset by the scale of the sum
aj = aj.scalb((int) (eaj - esum));
sum = sum.add(aj);
} else {
// Terms are expected to increase in magnitude then reduce.
// Here the terms are insignificant and we can stop.
// Effectively Aj -> eps * sum, and most of the computation is done.
break;
}
// Re-scale the sum
sum = sum.frexp(ie);
esum += ie[0];
}
// p = x * sum(Ai). Since the sum is normalised
// this is safe as long as x does not approach a sub-normal.
// Typically x in (1/n, 1 - 1/n).
sum = sum.multiply(x);
// Rescale the result
sum = sum.scalb((int) esum);
if (sd) {
// SF = 1 - CDF
sum = sum.negate().add(1);
}
return clipProbability(sum.doubleValue());
}