private static Result aov()

in commons-statistics-inference/src/main/java/org/apache/commons/statistics/inference/OneWayAnova.java [258:338]


    private static Result aov(Collection<double[]> data, double[] statistic) {
        Arguments.checkCategoriesRequiredSize(data.size(), 2);
        long n = 0;
        for (final double[] array : data) {
            n += array.length;
            Arguments.checkValuesRequiredSize(array.length, 1);
        }
        final long dfwg = n - data.size();
        if (dfwg == 0) {
            throw new InferenceException(InferenceException.ZERO, "Degrees of freedom within groups");
        }

        // wg = within group
        // bg = between group

        // F = Var(bg) / Var(wg)
        // Var = SS / df
        // SStotal = sum((x - u)^2) = sum(x^2) - sum(x)^2/n
        //         = SSwg + SSbg
        // Some cancellation of terms reduces the computation to 3 sums:
        // SSwg = [ sum(x^2) - sum(x)^2/n ] - [ sum_g { sum(sum(x^2) - sum(x)^2/n) } ]
        // SSbg = SStotal - SSwg
        //      = sum_g { sum(x)^2/n) } - sum(x)^2/n
        // SSwg = SStotal - SSbg
        //      = sum(x^2) - sum_g { sum(x)^2/n) }

        // Stabilize the computation by shifting all to a common mean of zero.
        // This minimise the magnitude of x^2 terms.
        // The terms sum(x)^2/n -> 0. Included them to capture the round-off.
        final double m = StatisticUtils.mean(data);
        final Sum sxx = Sum.create();
        final Sum sx = Sum.create();
        final Sum sg = Sum.create();
        // Track if each group had the same value
        boolean eachSame = true;
        for (final double[] array : data) {
            eachSame = eachSame && allMatch(array[0], array);
            final Sum s = Sum.create();
            for (final double v : array) {
                final double x = v - m;
                s.add(x);
                // sum(x)
                sx.add(x);
                // sum(x^2)
                sxx.add(x * x);
            }
            // Create the negative sum so we can subtract it via 'add'
            // -sum_g { sum(x)^2/n) }
            sg.add(-pow2(s.getAsDouble()) / array.length);
        }

        // Note: SS terms should not be negative given:
        // SS = sum((x - u)^2)
        // This can happen due to floating-point error in sum(x^2) - sum(x)^2/n
        final double sswg = Math.max(0, sxx.add(sg).getAsDouble());
        // Flip the sign back
        final double ssbg = Math.max(0, -sg.add(pow2(sx.getAsDouble()) / n).getAsDouble());
        final int dfbg = data.size() - 1;
        // Handle edge-cases:
        // Note: 0 / 0 -> NaN : x / 0 -> inf
        // These are documented results and should output p=NaN or 0.
        // This result will occur naturally.
        // However the SS totals may not be 0.0 so we correct these cases.
        final boolean allSame = eachSame && allMatch(data);
        final double msbg = allSame ? 0 : ssbg / dfbg;
        final double mswg = eachSame ? 0 : sswg / dfwg;
        final double f = msbg / mswg;
        if (statistic != null) {
            statistic[0] = f;
            return null;
        }
        final double p = FDistribution.of(dfbg, dfwg).survivalProbability(f);

        // Support partitioning the variance
        // ni = size of each of the groups
        // nO=(1/(a−1))*(sum(ni)−(sum(ni^2)/sum(ni))
        final double nO = (n - data.stream()
                .mapToDouble(x -> pow2(x.length)).sum() / n) / dfbg;

        return new Result(dfbg, dfwg, msbg, mswg, nO, f, p);
    }