private static double twoSampleOneSidedPOutside()

in commons-statistics-inference/src/main/java/org/apache/commons/statistics/inference/KolmogorovSmirnovTest.java [1256:1319]


    private static double twoSampleOneSidedPOutside(long d, int n, int m, int gcd) {
        // Hodges, Fig.2
        // Lower boundary: (nx - my)/nm >= d : (nx - my) >= dnm
        // B(x, y) is the number of ways from (0, 0) to (x, y) without previously
        // reaching the boundary.
        // B(x, y) = binom(x+y, y) - [number of ways which previously reached the boundary]
        // Total paths:
        // sum_y { B(x, y) binom(m+n-x-y, n-y) }

        // Normalized by binom(m+n, n). Check this is possible.
        final long lm = m;
        if (n + lm > Integer.MAX_VALUE) {
            return -1;
        }
        final double binom = binom(m + n, n);
        if (binom == Double.POSITIVE_INFINITY) {
            return -1;
        }

        // This could be updated to use d in [1, lcm].
        // Currently it uses d in [gcd, n*m].
        final long dnm = d * gcd;

        // Visit all x in [0, m] where (nx - my) >= d for each increasing y in [0, n].
        // x = ceil( (d + my) / n ) = (d + my + n - 1) / n
        // y = ceil( (nx - d) / m ) = (nx - d + m - 1) / m
        // Note: n m integer, d in [0, nm], the intermediate cannot overflow a long.
        // x | y=0 = (d + n - 1) / n
        final int x0 = (int) ((dnm + n - 1) / n);
        if (x0 >= m) {
            return 1 / binom;
        }
        // The y above is the y *on* the boundary. Set the limit as the next y above:
        // y | x=m = 1 + floor( (nx - d) / m ) = 1 + (nm - d) / m
        final int maxy = (int) ((n * lm - dnm + m) / m);
        // Compute x and B(x, y) for visited B(x,y)
        final int[] xy = new int[maxy];
        final double[] bxy = new double[maxy];
        xy[0] = x0;
        bxy[0] = 1;
        for (int y = 1; y < maxy; y++) {
            final int x = (int) ((dnm + lm * y + n - 1) / n);
            // B(x, y) = binom(x+y, y) - [number of ways which previously reached the boundary]
            // Add the terms to subtract as a negative sum.
            final Sum b = Sum.create();
            for (int yy = 0; yy < y; yy++) {
                // Here: previousX = x - xy[yy] : previousY = y - yy
                // bxy[yy] is the paths to (previousX, previousY)
                // binom represent the paths from (previousX, previousY) to (x, y)
                b.addProduct(bxy[yy], -binom(x - xy[yy] + y - yy, y - yy));
            }
            b.add(binom(x + y, y));
            xy[y] = x;
            bxy[y] = b.getAsDouble();
        }
        // sum_y { B(x, y) binom(m+n-x-y, n-y) }
        final Sum sum = Sum.create();
        for (int y = 0; y < maxy; y++) {
            sum.addProduct(bxy[y], binom(m + n - xy[y] - y, n - y));
        }
        // No individual term should have overflowed since binom is finite.
        // Any sum above 1 is floating-point error.
        return KolmogorovSmirnovDistribution.clipProbability(sum.getAsDouble() / binom);
    }