def rolling_mean_by_h()

in python/prophet/diagnostics.py [0:0]


def rolling_mean_by_h(x, h, w, name):
    """Compute a rolling mean of x, after first aggregating by h.

    Right-aligned. Computes a single mean for each unique value of h. Each
    mean is over at least w samples.

    Parameters
    ----------
    x: Array.
    h: Array of horizon for each value in x.
    w: Integer window size (number of elements).
    name: Name for metric in result dataframe

    Returns
    -------
    Dataframe with columns horizon and name, the rolling mean of x.
    """
    # Aggregate over h
    df = pd.DataFrame({'x': x, 'h': h})
    df2 = (
        df.groupby('h').agg(['mean', 'count']).reset_index().sort_values('h')
    )
    xm = df2['x']['mean'].values
    ns = df2['x']['count'].values
    hs = df2['h'].values

    res_h = []
    res_x = []
    # Start from the right and work backwards
    i = len(hs) - 1
    while i >= 0:
        # Construct a mean of at least w samples.
        n = int(ns[i])
        xbar = float(xm[i])
        j = i - 1
        while ((n < w) and j >= 0):
            # Include points from the previous horizon. All of them if still
            # less than w, otherwise just enough to get to w.
            n2 = min(w - n, ns[j])
            xbar = xbar * (n / (n + n2)) + xm[j] * (n2 / (n + n2))
            n += n2
            j -= 1
        if n < w:
            # Ran out of horizons before enough points.
            break
        res_h.append(hs[i])
        res_x.append(xbar)
        i -= 1
    res_h.reverse()
    res_x.reverse()
    return pd.DataFrame({'horizon': res_h, name: res_x})