in src/mozanalysis/bayesian_stats/bayesian_bootstrap.py [0:0]
def make_bb_quantile_closure(quantiles):
"""Return a function to calculate quantiles for a bootstrap replicate.
Args:
quantiles (float, list of floats): Quantiles to compute
Returns a function that calculates quantiles for a bootstrap replicate:
Args:
values (pd.Series, ndarray):
One dimensional array of observed values
prob_weights (pd.Series, ndarray):
Equally shaped array of the probability weight associated with
each value.
Returns:
* A quantile as a np.float, or
* several quantiles as a dict keyed by the quantiles
"""
# If https://github.com/numpy/numpy/pull/9211/ is ever merged then
# we can just use that instead.
def get_value_at_quantile(values, cdf, quantile):
"""Return the value at a quantile.
Does no interpolation because our Bayesian bootstrap
implementation calls `np.unique` to tally the values:
if it did not take this shortcut then regardless of whether
we interpolate when returning quantiles, the vast majority
of quantiles would coincide with a value. But since we take
this shortcut, interpolation mostly returns values not in the
dataset. Ergh.
"""
# Add a tolerance of 1e-6 to account for numerical error when
# computing the cdf
arg = np.nonzero(quantile < cdf + 1e-6)[0][0]
return values[arg]
def bb_quantile(values, prob_weights):
"""Calculate quantiles for a bootstrap replicate.
Args:
values (pd.Series, ndarray): One dimensional array
of observed values
prob_weights (pd.Series, ndarray): Equally shaped
array of the probability weight associated with
each value.
Returns:
* A quantile as a np.float, or
* several quantiles as a dict keyed by the quantiles
"""
# assume values is previously sorted, as per np.unique()
cdf = np.cumsum(prob_weights)
if np.isscalar(quantiles):
return get_value_at_quantile(values, cdf, quantiles)
else:
return {q: get_value_at_quantile(values, cdf, q) for q in quantiles}
return bb_quantile