static double sf()

in commons-statistics-inference/src/main/java/org/apache/commons/statistics/inference/KolmogorovSmirnovDistribution.java [826:978]
63 lines of code
13 McCabe index (conditional complexity)

        static double sf(double x, int n, ScaledPower power) {
            // Compute only the SF using Algorithm 1 pp 12.

            // Compute: k = floor(n*x), alpha = nx - k; x = (k+alpha)/n with 0 <= alpha < 1
            final double[] alpha = {0};
            final int k = splitX(n, x, alpha);

            // Choose the algorithm:
            // Eq (13) Smirnov/Birnbaum-Tingey; or Smirnov/Dwass Eq (31)
            // Eq. 13 sums j = 0 : floor( n(1-x) )  = n - 1 - floor(nx) iff alpha != 0; else n - floor(nx)
            // Eq. 31 sums j = ceil( n(1-x) ) : n   = n - floor(nx)
            // Drop a term term if x = (n-j)/n. Equates to shifting the floor* down and ceil* up:
            // Eq. 13 N = floor*( n(1-x) ) = n - k - ((alpha!=0) ? 1 : 0) - ((alpha==0) ? 1 : 0)
            // Eq. 31 N = n - ceil*( n(1-x) ) = k - ((alpha==0) ? 1 : 0)
            // Where N is the number of terms - 1. This differs from Algorithm 1 by dropping
            // a SD term when it should be zero (to working precision).
            final int regN = n - k - 1;
            final int sdN = k - ((alpha[0] == 0) ? 1 : 0);

            // SD : Figure 3 (c) (pp. 6)
            // Terms Aj (j = n -> 0) have alternating signs through the range and may involve
            // numbers much bigger than 1 causing cancellation; magnitudes increase then decrease.
            // Section 3.3: Extra digits of precision required
            // grows like Order(sqrt(n)). E.g. sf=0.7 (x ~ 0.4/sqrt(n)) loses 8 digits.
            //
            // Regular : Figure 3 (a, b)
            // Terms Aj can have similar magnitude through the range; when x >= 1/sqrt(n)
            // the final few terms can be magnitudes smaller and could be ignored.
            // Section 3.4: As x increases the magnitude of terms becomes more peaked,
            // centred at j = (n-nx)/2, i.e. 50% of the terms.
            //
            // As n -> inf the sf for x = k/n agrees with the asymptote Eq 5 in log2(n) bits.
            //
            // Figure 4 has lines at x = 1/n and x = 3/sqrt(n).
            // Point between is approximately x = 4/n, i.e. nx < 4 : k <= 3.
            // If faster when x < 0.5 and requiring nx ~ 4 then requires n >= 8.
            //
            // Note: If SD accuracy scales with sqrt(n) then we could use 1 / sqrt(n).
            // That threshold is always above 4 / n when n is 16 (4/n = 1/sqrt(n) : n = 4^2).
            // So the current thresholds are conservative.
            boolean sd = false;
            if (sdN < regN) {
                // Here x < 0.5 and SD has fewer terms
                // Always choose when we only have one additional term (i.e x < 2/n)
                sd = sdN <= 1;
                // Otherwise when x < 4 / n
                sd |= sdN <= SD_MAX_TERMS && n >= SD_MIN_N;
            }

            final int maxN = sd ? sdN : regN;

            // Note: if N > "very large" use the asymptotic approximation.
            // Currently this check is done on n (sample size) in the calling function.
            // This provides a monotonic p-value for all x with the same n.

            // Configure the algorithm.
            // The error of double-double addition and multiplication is low (< 2^-102).
            // The error in Aj is mainly from the power function.
            // fastPow error is around 2^-52, pow error is ~ 2^-70 or lower.
            // Smirnoff-Dwass has a sum of terms that cancel and requires higher precision.
            // The power can optionally be specified.
            final ScaledPower fpow;
            if (power == POWER_DEFAULT) {
                // SD has only a few terms. Use a high accuracy power.
                fpow = sd ? DDMath::pow : DD::pow;
            } else {
                fpow = power;
            }
            // For the regular summation we must sum at least 50% of the terms. The number
            // of required bits to sum remaining terms of the same magnitude is log2(N/2).
            // These guards bits are conservative and > ~99% of terms are typically used.
            final int sumBits = sd ? SD_SUM_PRECISION_BITS : SUM_PRECISION_BITS + log2(maxN >> 1);

            // Working variable for the exponent of scaled values
            final int[] ie = {0};
            final long[] le = {0};

            // The terms Aj may over/underflow.
            // This is handled by maintaining the sum(Aj) using a fractional representation.
            // sum(Aj) maintained as 2^e * f with f in [0.5, 1)
            DD sum;
            long esum;

            // Compute A0
            if (sd) {
                // A0 = (1+x)^(n-1)
                sum = fpow.pow(DD.ofSum(1, x), n - 1, le);
                esum = le[0];
            } else {
                // A0 = (1-x)^n / x
                sum = fpow.pow(DD.ofDifference(1, x), n, le);
                esum = le[0];
                // x in (1/n, 1 - 1/n) so the divide of the fraction is safe
                sum = sum.divide(x).frexp(ie);
                esum += ie[0];
            }

            // Binomial coefficient c(n, j) maintained as 2^e * f with f in [1, 2)
            // This value is integral but maintained to limited precision
            DD c = DD.ONE;
            long ec = 0;
            for (int i = 1; i <= maxN; i++) {
                // c(n, j) = c(n, j-1) * (n-j+1) / j
                c = c.multiply(DD.fromQuotient(n - i + 1, i));
                // Here we maintain c in [1, 2) to restrict the scaled Aj term to [0.25, 2].
                final int b = Math.getExponent(c.hi());
                if (b != 0) {
                    c = c.scalb(-b);
                    ec += b;
                }
                // Compute Aj
                final int j = sd ? n - i : i;
                // Algorithm 4 pp. 27
                // S = ((j/n) + x)^(j-1)
                // T = ((n-j)/n - x)^(n-j)
                final DD s = fpow.pow(DD.fromQuotient(j, n).add(x), j - 1, le);
                final long es = le[0];
                final DD t = fpow.pow(DD.fromQuotient(n - j, n).subtract(x), n - j, le);
                final long et = le[0];
                // Aj = C(n, j) * T * S
                //    = 2^e * [1, 2] * [0.5, 1] * [0.5, 1]
                //    = 2^e * [0.25, 2]
                final long eaj = ec + es + et;
                // Only compute and add to the sum when the exponents overlap by n-bits.
                if (eaj > esum - sumBits) {
                    DD aj = c.multiply(t).multiply(s);
                    // Scaling must offset by the scale of the sum
                    aj = aj.scalb((int) (eaj - esum));
                    sum = sum.add(aj);
                } else {
                    // Terms are expected to increase in magnitude then reduce.
                    // Here the terms are insignificant and we can stop.
                    // Effectively Aj -> eps * sum, and most of the computation is done.
                    break;
                }

                // Re-scale the sum
                sum = sum.frexp(ie);
                esum += ie[0];
            }

            // p = x * sum(Ai). Since the sum is normalised
            // this is safe as long as x does not approach a sub-normal.
            // Typically x in (1/n, 1 - 1/n).
            sum = sum.multiply(x);
            // Rescale the result
            sum = sum.scalb((int) esum);
            if (sd) {
                // SF = 1 - CDF
                sum = sum.negate().add(1);
            }
            return clipProbability(sum.doubleValue());
        }