in python/prophet/diagnostics.py [0:0]
def rolling_mean_by_h(x, h, w, name):
"""Compute a rolling mean of x, after first aggregating by h.
Right-aligned. Computes a single mean for each unique value of h. Each
mean is over at least w samples.
Parameters
----------
x: Array.
h: Array of horizon for each value in x.
w: Integer window size (number of elements).
name: Name for metric in result dataframe
Returns
-------
Dataframe with columns horizon and name, the rolling mean of x.
"""
# Aggregate over h
df = pd.DataFrame({'x': x, 'h': h})
df2 = (
df.groupby('h').agg(['mean', 'count']).reset_index().sort_values('h')
)
xm = df2['x']['mean'].values
ns = df2['x']['count'].values
hs = df2['h'].values
res_h = []
res_x = []
# Start from the right and work backwards
i = len(hs) - 1
while i >= 0:
# Construct a mean of at least w samples.
n = int(ns[i])
xbar = float(xm[i])
j = i - 1
while ((n < w) and j >= 0):
# Include points from the previous horizon. All of them if still
# less than w, otherwise just enough to get to w.
n2 = min(w - n, ns[j])
xbar = xbar * (n / (n + n2)) + xm[j] * (n2 / (n + n2))
n += n2
j -= 1
if n < w:
# Ran out of horizons before enough points.
break
res_h.append(hs[i])
res_x.append(xbar)
i -= 1
res_h.reverse()
res_x.reverse()
return pd.DataFrame({'horizon': res_h, name: res_x})