public static SharedStateDiscreteSampler of()

in commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/AliasMethodDiscreteSampler.java [351:466]


    public static SharedStateDiscreteSampler of(final UniformRandomProvider rng,
                                                final double[] probabilities,
                                                int alpha) {
        // The Alias method balances N categories with counts around the mean into N sections,
        // each allocated 'mean' observations.
        //
        // Consider 4 categories with counts 6,3,2,1. The histogram can be balanced into a
        // 2D array as 4 sections with a height of the mean:
        //
        // 6
        // 6
        // 6
        // 63   => 6366   --
        // 632     6326    |-- mean
        // 6321    6321   --
        //
        // section abcd
        //
        // Each section is divided as:
        // a: 6=1/1
        // b: 3=1/1
        // c: 2=2/3; 6=1/3   (6 is the alias)
        // d: 1=1/3; 6=2/3   (6 is the alias)
        //
        // The sample is obtained by randomly selecting a section, then choosing which category
        // from the pair based on a uniform random deviate.

        final double sumProb = InternalUtils.validateProbabilities(probabilities);

        // Allow zero-padding
        final int n = computeSize(probabilities.length, alpha);

        // Partition into small and large by splitting on the average.
        final double mean = sumProb / n;
        // The cardinality of smallSize + largeSize = n.
        // So fill the same array from either end.
        final int[] indices = new int[n];
        int large = n;
        int small = 0;
        for (int i = 0; i < probabilities.length; i++) {
            if (probabilities[i] >= mean) {
                indices[--large] = i;
            } else {
                indices[small++] = i;
            }
        }

        small = fillRemainingIndices(probabilities.length, indices, small);

        // This may be smaller than the input length if the probabilities were already padded.
        final int nonZeroIndex = findLastNonZeroIndex(probabilities);

        // The probabilities are modified so use a copy.
        // Note: probabilities are required only up to last nonZeroIndex
        final double[] remainingProbabilities = Arrays.copyOf(probabilities, nonZeroIndex + 1);

        // Allocate the final tables.
        // Probability table may be truncated (when zero padded).
        // The alias table is full length.
        final long[] probability = new long[remainingProbabilities.length];
        final int[] alias = new int[n];

        // This loop uses each large in turn to fill the alias table for small probabilities that
        // do not reach the requirement to fill an entire section alone (i.e. p < mean).
        // Since the sum of the small should be less than the sum of the large it should use up
        // all the small first. However floating point round-off can result in
        // misclassification of items as small or large. The Vose algorithm handles this using
        // a while loop conditioned on the size of both sets and a subsequent loop to use
        // unpaired items.
        while (large != n && small != 0) {
            // Index of the small and the large probabilities.
            final int j = indices[--small];
            final int k = indices[large++];

            // Optimisation for zero-padded input:
            // p(j) = 0 above the last nonZeroIndex
            if (j > nonZeroIndex) {
                // The entire amount for the section is taken from the alias.
                remainingProbabilities[k] -= mean;
            } else {
                final double pj = remainingProbabilities[j];

                // Item j is a small probability that is below the mean.
                // Compute the weight of the section for item j: pj / mean.
                // This is scaled by 2^53 and the ceiling function used to round-up
                // the probability to a numerator of a fraction in the range [1,2^53].
                // Ceiling ensures non-zero values.
                probability[j] = (long) Math.ceil(CONVERT_TO_NUMERATOR * (pj / mean));

                // The remaining amount for the section is taken from the alias.
                // Effectively: probabilities[k] -= (mean - pj)
                remainingProbabilities[k] += pj - mean;
            }

            // If not j then the alias is k
            alias[j] = k;

            // Add the remaining probability from large to the appropriate list.
            if (remainingProbabilities[k] >= mean) {
                indices[--large] = k;
            } else {
                indices[small++] = k;
            }
        }

        // Final loop conditions to consume unpaired items.
        // Note: The large set should never be non-empty but this can occur due to round-off
        // error so consume from both.
        fillTable(probability, alias, indices, 0, small);
        fillTable(probability, alias, indices, large, n);

        // Change the algorithm for small power of 2 sized tables
        return isSmallPowerOf2(n) ?
            new SmallTableAliasMethodDiscreteSampler(rng, probability, alias) :
            new AliasMethodDiscreteSampler(rng, probability, alias);
    }