in commons-statistics-inference/src/main/java/org/apache/commons/statistics/inference/KolmogorovSmirnovTest.java [785:874]
private long computeIntegralKolmogorovSmirnovStatistic(double[] x, double[] y, int[] sign, long[] tiesD) {
// Sort the sample arrays
sort(x, SAMPLE_1_NAME);
sort(y, SAMPLE_2_NAME);
final int n = x.length;
final int m = y.length;
// CDFs range from 0 to 1 using increments of 1/n and 1/m for x and y respectively.
// Scale by n*m to use increments of m and n for x and y.
// Find the max difference between cdf_x and cdf_y.
int i = 0;
int j = 0;
long d = 0;
long plus = 0;
long minus = 0;
// Ties: store the D+,D- for most extreme path though tie region(s)
long tplus = 0;
long tminus = 0;
do {
// No NaNs so compare using < and >
if (x[i] < y[j]) {
final double z = x[i];
do {
i++;
d += m;
} while (i < n && x[i] == z);
plus = d > plus ? d : plus;
} else if (x[i] > y[j]) {
final double z = y[j];
do {
j++;
d -= n;
} while (j < m && y[j] == z);
minus = d < minus ? d : minus;
} else {
// Traverse to the end of the tied section for d.
// Also compute the most extreme path through the tied region.
final double z = x[i];
final long dd = d;
int k = i;
do {
i++;
} while (i < n && x[i] == z);
k = i - k;
d += k * (long) m;
// Extreme D+ path
tplus = d > tplus ? d : tplus;
k = j;
do {
j++;
} while (j < m && y[j] == z);
k = j - k;
d -= k * (long) n;
// Extreme D- path must start at the original d
tminus = Math.min(tminus, dd - k * (long) n);
// End of tied section
if (d > plus) {
plus = d;
} else if (d < minus) {
minus = d;
}
}
} while (i < n && j < m);
// The presence of any ties is flagged by a non-zero value for D+ or D-.
// Note we cannot use the selected tiesD value as in the one-sided case it may be zero
// and the non-selected D value will be non-zero.
tiesD[0] = tplus | tminus;
// For simplicity the correct tiesD is not returned (correct value is commented).
// The only case that matters is tiesD > D which is evaluated by the caller.
// Note however that the distance of tiesD < D is a measure of how little the
// tied region matters.
if (alternative == AlternativeHypothesis.GREATER_THAN) {
sign[0] = 1;
// correct = max(tplus, plus)
tiesD[1] = tplus;
return plus;
} else if (alternative == AlternativeHypothesis.LESS_THAN) {
sign[0] = -1;
// correct = -min(tminus, minus)
tiesD[1] = -tminus;
return -minus;
} else {
// Two sided.
sign[0] = Double.compare(plus, -minus);
d = Math.max(plus, -minus);
// correct = max(d, max(tplus, -tminus))
tiesD[1] = Math.max(tplus, -tminus);
return d;
}
}