in python/prophet/diagnostics.py [0:0]
def rolling_median_by_h(x, h, w, name):
"""Compute a rolling median of x, after first aggregating by h.
Right-aligned. Computes a single median for each unique value of h. Each
median is over at least w samples.
For each h where there are fewer than w samples, we take samples from the previous h,
moving backwards. (In other words, we ~ assume that the x's are shuffled within each h.)
Parameters
----------
x: Array.
h: Array of horizon for each value in x.
w: Integer window size (number of elements).
name: Name for metric in result dataframe
Returns
-------
Dataframe with columns horizon and name, the rolling median of x.
"""
# Aggregate over h
df = pd.DataFrame({'x': x, 'h': h})
grouped = df.groupby('h')
df2 = grouped.size().reset_index().sort_values('h')
hs = df2['h']
res_h = []
res_x = []
# Start from the right and work backwards
i = len(hs) - 1
while i >= 0:
h_i = hs[i]
xs = grouped.get_group(h_i).x.tolist()
# wrap in array so this works if h is pandas Series with custom index or numpy array
next_idx_to_add = np.array(h == h_i).argmax() - 1
while (len(xs) < w) and (next_idx_to_add >= 0):
# Include points from the previous horizon. All of them if still
# less than w, otherwise just enough to get to w.
xs.append(x[next_idx_to_add])
next_idx_to_add -= 1
if len(xs) < w:
# Ran out of points before getting enough.
break
res_h.append(hs[i])
res_x.append(np.median(xs))
i -= 1
res_h.reverse()
res_x.reverse()
return pd.DataFrame({'horizon': res_h, name: res_x})