def summarize_joint_samples()

in src/mozanalysis/bayesian_stats/__init__.py [0:0]


def summarize_joint_samples(focus, reference, quantiles=DEFAULT_QUANTILES):
    """Return descriptive statistics for uplifts.

    The intended use case of this function is to compare a 'focus'
    experiment branch to a 'reference' experiment branch (e.g. the
    control). Samples from each branch are combined pairwise; these
    pairs are considered to be samples from the joint probability
    distribution (JPD). We compute various quantities from the JPD:

    * We compute summary statistics for the distribution over relative
      uplifts ``focus / reference - 1``
    * We compute summary statistics for the distribution over absolute
      uplifts ``focus - reference``
    * We compute a summary statistic for the distribution over the L1
      norm of absolute uplifts ``abs(focus - reference)``
    * We compute the fraction of probability mass in the region
      ``focus > reference``, which in a Bayesian context may be
      interpreted as the probability that the ground truth model
      parameter is larger for the focus branch than the reference
      branch.

    ``focus`` and ``reference`` are samples from distributions; each is the
    same format that would be supplied to `summarize_one_branch_samples`
    when analyzing the branches independently.

    Can be used to analyse a single metric (supply Series as arguments)
    or in batch mode (supply DataFrames as arguments).

    Args:
    -----
        focus (pandas.Series or pandas.DataFrame): Bootstrapped samples
            or samples of a model parameter for a branch of an
            experiment. If a DataFrame, each column represents a
            different quantity.
        reference (pandas.Series or pandas.DataFrame): The same
            quantity, calculated for a different branch (typically the
            control).
        quantiles (list, optional): The quantiles to compute - a good
            reason to override the defaults would be when Bonferroni
            corrections are required.

    Returns:
    --------
        A pandas Series or DataFrame containing a MultiIndex with the
        following labels on the higher level and stringified floats
        on the inner level

        * rel_uplift: Expectation value and quantiles over the relative
          uplift.
        * abs_uplift: Expectation value and quantiles over the absolute
          uplift.
        * max_abs_diff: Quantile 0.95 on the L1 norm of differences/
          absolute uplifts. In a Bayesian context, there is a 95%
          probability that the absolute difference is less than this in
          either direction.
        * prob_win: In a Bayesian context, the probability that the ground
          truth model parameter is larger for the focus than the reference
          branch.

        If returning a DataFrame, this MultiIndex is for the columns, and
        the index matches the columns of ``focus``.

    """
    if isinstance(focus, pd.DataFrame) or not np.isscalar(focus[0]):
        return _summarize_joint_samples_batch(focus, reference, quantiles)
    else:
        return _summarize_joint_samples_single(focus, reference, quantiles)