def make_bb_quantile_closure()

in src/mozanalysis/bayesian_stats/bayesian_bootstrap.py [0:0]


def make_bb_quantile_closure(quantiles):
    """Return a function to calculate quantiles for a bootstrap replicate.

    Args:
        quantiles (float, list of floats): Quantiles to compute

    Returns a function that calculates quantiles for a bootstrap replicate:

        Args:

            values (pd.Series, ndarray):
                One dimensional array of observed values
            prob_weights (pd.Series, ndarray):
                Equally shaped array of the probability weight associated with
                each value.

        Returns:

            * A quantile as a np.float, or
            * several quantiles as a dict keyed by the quantiles

    """

    # If https://github.com/numpy/numpy/pull/9211/ is ever merged then
    # we can just use that instead.

    def get_value_at_quantile(values, cdf, quantile):
        """Return the value at a quantile.

        Does no interpolation because our Bayesian bootstrap
        implementation calls `np.unique` to tally the values:
        if it did not take this shortcut then regardless of whether
        we interpolate when returning quantiles, the vast majority
        of quantiles would coincide with a value. But since we take
        this shortcut, interpolation mostly returns values not in the
        dataset. Ergh.
        """
        # Add a tolerance of 1e-6 to account for numerical error when
        # computing the cdf
        arg = np.nonzero(quantile < cdf + 1e-6)[0][0]
        return values[arg]

    def bb_quantile(values, prob_weights):
        """Calculate quantiles for a bootstrap replicate.

        Args:
            values (pd.Series, ndarray): One dimensional array
                of observed values
            prob_weights (pd.Series, ndarray): Equally shaped
                array of the probability weight associated with
                each value.

        Returns:

            * A quantile as a np.float, or
            * several quantiles as a dict keyed by the quantiles
        """
        # assume values is previously sorted, as per np.unique()
        cdf = np.cumsum(prob_weights)

        if np.isscalar(quantiles):
            return get_value_at_quantile(values, cdf, quantiles)

        else:
            return {q: get_value_at_quantile(values, cdf, q) for q in quantiles}

    return bb_quantile