private static FastLoadedDiceRollerDiscreteSampler createSampler()

in commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/FastLoadedDiceRollerDiscreteSampler.java [652:705]


    private static FastLoadedDiceRollerDiscreteSampler createSampler(UniformRandomProvider rng,
                                                                     long[] frequencies,
                                                                     int[] indices,
                                                                     long m) {
        // ALGORITHM 5: PREPROCESS
        // a == frequencies
        // m = sum(a)
        // h = leaf node count
        // H = leaf node label (lH)

        final int n = frequencies.length;

        // k = ceil(log2(m))
        final int k = 64 - Long.numberOfLeadingZeros(m - 1);
        // r = a(n+1) = 2^k - m
        final long r = (1L << k) - m;

        // Note:
        // A sparse matrix can often be used for H, as most of its entries are empty.
        // This implementation uses a 1D array for efficiency at the cost of memory.
        // This is limited to approximately ((2^31 - 1) / k), e.g. 34087042 when the sum of
        // observations is large enough to create k=63.
        // This could be handled using a 2D array. In practice a number of categories this
        // large is not expected and is currently not supported.
        final int[] h = new int[k];
        final int[] lH = new int[checkArraySize((n + 1L) * k)];
        Arrays.fill(lH, NO_LABEL);

        int d;
        for (int j = 0; j < k; j++) {
            final int shift = (k - 1) - j;
            final long bitMask = 1L << shift;

            d = 0;
            for (final int i : indices) {
                // bool w ← (a[i] >> (k − 1) − j)) & 1
                // h[j] = h[j] + w
                // if w then:
                if ((frequencies[i] & bitMask) != 0) {
                    h[j]++;
                    // H[d][j] = i
                    lH[d * k + j] = i;
                    d++;
                }
            }
            // process a(n+1) without extending the input frequencies array by 1
            if ((r & bitMask) != 0) {
                h[j]++;
                lH[d * k + j] = n;
            }
        }

        return new FLDRSampler(rng, n, k, h, lH);
    }