in afa/core.py [0:0]
def run_cv_select(df, horiz, freq, metric="smape", cv_stride=3,
cv_periods=None, show_progress=False):
"""Run the timeseries cross-val model selection across the forecasting
functions for a single timeseries (`y`) and horizon length (`horiz`).
"""
if horiz is None:
horiz = int(df.iloc[0]["horiz"])
channel = df.iloc[0]["channel"]
family = df.iloc[0]["family"]
item_id = df.iloc[0]["item_id"]
assert len(df["demand"]) > 0
# these are the model configurations to run
grid = create_model_grid()
#grid = [g for g in grid if "|log" not in g[0]]
#
# decompose sliding windows once
#
period = DC_PERIODS[freq]
y = df["demand"].values
if horiz >= len(y):
cv_horiz = len(y) - 1
else:
cv_horiz = horiz
if cv_periods is None:
cv_start = 1
else:
cv_start = max(1, y.shape[0] - cv_periods)
dc_dict = {}
y = df["demand"].values
y = np.nan_to_num(y)
if len(y) == 1:
y = np.pad(y, [1,0], constant_values=1)
for i in range(cv_start, len(y)):
try:
dc = sm.tsa.seasonal_decompose(y[:i], period=period, two_sided=False)
yp_seasonal = fourier(dc.seasonal, horiz, freq, seasonal=False)
vals = (dc.resid, dc.trend, yp_seasonal)
except:
vals = None
dc_dict[i] = vals
results = [run_cv(cfg, df, horiz, freq, cv_start, cv_stride,
dc_dict=dc_dict, metric=metric) for cfg in grid]
df_results = pd.concat(results)
# rank results by the metric
df_results.sort_values(by=metric + "_mean", ascending=True, inplace=True)
df_results["rank"] = np.arange(len(df_results)) + 1
df_results.insert(0, "channel", channel)
df_results.insert(1, "family", family)
df_results.insert(2, "item_id", item_id)
df_results.insert(3, "horiz", horiz)
# get the forecast from the best model
yhat = df_results.iloc[0]["yhat"]
yhat_ts = pd.date_range(df.index.max(),
periods=len(yhat)+1, freq=freq, closed="right")[:len(yhat)]
# get the timeseries indices of the horizons
df_horiz = df.iloc[-1].index
assert len(yhat_ts) > 0
assert len(yhat_ts) == len(yhat), f"{yhat_ts} {yhat}"
# make the forecast dataframe
df_pred = pd.DataFrame({"demand": yhat, "timestamp": yhat_ts})
df_pred.insert(0, "channel", channel)
df_pred.insert(1, "family", family)
df_pred.insert(2, "item_id", item_id)
df_pred.insert(3, "horiz", horiz)
df_pred["type"] = "fcast"
df_pred.set_index("timestamp", drop=False, inplace=True)
df["type"] = "actual"
# combine historical and predictions dataframes, re-ordering columns
df_pred = df[GROUP_COLS + ["demand", "type"]] \
.append(df_pred)[GROUP_COLS + ["demand", "type"]]
return df_pred, df_results