in commons-statistics-inference/src/main/java/org/apache/commons/statistics/inference/KolmogorovSmirnovTest.java [1256:1319]
private static double twoSampleOneSidedPOutside(long d, int n, int m, int gcd) {
// Hodges, Fig.2
// Lower boundary: (nx - my)/nm >= d : (nx - my) >= dnm
// B(x, y) is the number of ways from (0, 0) to (x, y) without previously
// reaching the boundary.
// B(x, y) = binom(x+y, y) - [number of ways which previously reached the boundary]
// Total paths:
// sum_y { B(x, y) binom(m+n-x-y, n-y) }
// Normalized by binom(m+n, n). Check this is possible.
final long lm = m;
if (n + lm > Integer.MAX_VALUE) {
return -1;
}
final double binom = binom(m + n, n);
if (binom == Double.POSITIVE_INFINITY) {
return -1;
}
// This could be updated to use d in [1, lcm].
// Currently it uses d in [gcd, n*m].
final long dnm = d * gcd;
// Visit all x in [0, m] where (nx - my) >= d for each increasing y in [0, n].
// x = ceil( (d + my) / n ) = (d + my + n - 1) / n
// y = ceil( (nx - d) / m ) = (nx - d + m - 1) / m
// Note: n m integer, d in [0, nm], the intermediate cannot overflow a long.
// x | y=0 = (d + n - 1) / n
final int x0 = (int) ((dnm + n - 1) / n);
if (x0 >= m) {
return 1 / binom;
}
// The y above is the y *on* the boundary. Set the limit as the next y above:
// y | x=m = 1 + floor( (nx - d) / m ) = 1 + (nm - d) / m
final int maxy = (int) ((n * lm - dnm + m) / m);
// Compute x and B(x, y) for visited B(x,y)
final int[] xy = new int[maxy];
final double[] bxy = new double[maxy];
xy[0] = x0;
bxy[0] = 1;
for (int y = 1; y < maxy; y++) {
final int x = (int) ((dnm + lm * y + n - 1) / n);
// B(x, y) = binom(x+y, y) - [number of ways which previously reached the boundary]
// Add the terms to subtract as a negative sum.
final Sum b = Sum.create();
for (int yy = 0; yy < y; yy++) {
// Here: previousX = x - xy[yy] : previousY = y - yy
// bxy[yy] is the paths to (previousX, previousY)
// binom represent the paths from (previousX, previousY) to (x, y)
b.addProduct(bxy[yy], -binom(x - xy[yy] + y - yy, y - yy));
}
b.add(binom(x + y, y));
xy[y] = x;
bxy[y] = b.getAsDouble();
}
// sum_y { B(x, y) binom(m+n-x-y, n-y) }
final Sum sum = Sum.create();
for (int y = 0; y < maxy; y++) {
sum.addProduct(bxy[y], binom(m + n - xy[y] - y, n - y));
}
// No individual term should have overflowed since binom is finite.
// Any sum above 1 is floating-point error.
return KolmogorovSmirnovDistribution.clipProbability(sum.getAsDouble() / binom);
}