in commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/AliasMethodDiscreteSampler.java [351:466]
public static SharedStateDiscreteSampler of(final UniformRandomProvider rng,
final double[] probabilities,
int alpha) {
// The Alias method balances N categories with counts around the mean into N sections,
// each allocated 'mean' observations.
//
// Consider 4 categories with counts 6,3,2,1. The histogram can be balanced into a
// 2D array as 4 sections with a height of the mean:
//
// 6
// 6
// 6
// 63 => 6366 --
// 632 6326 |-- mean
// 6321 6321 --
//
// section abcd
//
// Each section is divided as:
// a: 6=1/1
// b: 3=1/1
// c: 2=2/3; 6=1/3 (6 is the alias)
// d: 1=1/3; 6=2/3 (6 is the alias)
//
// The sample is obtained by randomly selecting a section, then choosing which category
// from the pair based on a uniform random deviate.
final double sumProb = InternalUtils.validateProbabilities(probabilities);
// Allow zero-padding
final int n = computeSize(probabilities.length, alpha);
// Partition into small and large by splitting on the average.
final double mean = sumProb / n;
// The cardinality of smallSize + largeSize = n.
// So fill the same array from either end.
final int[] indices = new int[n];
int large = n;
int small = 0;
for (int i = 0; i < probabilities.length; i++) {
if (probabilities[i] >= mean) {
indices[--large] = i;
} else {
indices[small++] = i;
}
}
small = fillRemainingIndices(probabilities.length, indices, small);
// This may be smaller than the input length if the probabilities were already padded.
final int nonZeroIndex = findLastNonZeroIndex(probabilities);
// The probabilities are modified so use a copy.
// Note: probabilities are required only up to last nonZeroIndex
final double[] remainingProbabilities = Arrays.copyOf(probabilities, nonZeroIndex + 1);
// Allocate the final tables.
// Probability table may be truncated (when zero padded).
// The alias table is full length.
final long[] probability = new long[remainingProbabilities.length];
final int[] alias = new int[n];
// This loop uses each large in turn to fill the alias table for small probabilities that
// do not reach the requirement to fill an entire section alone (i.e. p < mean).
// Since the sum of the small should be less than the sum of the large it should use up
// all the small first. However floating point round-off can result in
// misclassification of items as small or large. The Vose algorithm handles this using
// a while loop conditioned on the size of both sets and a subsequent loop to use
// unpaired items.
while (large != n && small != 0) {
// Index of the small and the large probabilities.
final int j = indices[--small];
final int k = indices[large++];
// Optimisation for zero-padded input:
// p(j) = 0 above the last nonZeroIndex
if (j > nonZeroIndex) {
// The entire amount for the section is taken from the alias.
remainingProbabilities[k] -= mean;
} else {
final double pj = remainingProbabilities[j];
// Item j is a small probability that is below the mean.
// Compute the weight of the section for item j: pj / mean.
// This is scaled by 2^53 and the ceiling function used to round-up
// the probability to a numerator of a fraction in the range [1,2^53].
// Ceiling ensures non-zero values.
probability[j] = (long) Math.ceil(CONVERT_TO_NUMERATOR * (pj / mean));
// The remaining amount for the section is taken from the alias.
// Effectively: probabilities[k] -= (mean - pj)
remainingProbabilities[k] += pj - mean;
}
// If not j then the alias is k
alias[j] = k;
// Add the remaining probability from large to the appropriate list.
if (remainingProbabilities[k] >= mean) {
indices[--large] = k;
} else {
indices[small++] = k;
}
}
// Final loop conditions to consume unpaired items.
// Note: The large set should never be non-empty but this can occur due to round-off
// error so consume from both.
fillTable(probability, alias, indices, 0, small);
fillTable(probability, alias, indices, large, n);
// Change the algorithm for small power of 2 sized tables
return isSmallPowerOf2(n) ?
new SmallTableAliasMethodDiscreteSampler(rng, probability, alias) :
new AliasMethodDiscreteSampler(rng, probability, alias);
}