in afa/app/app.py [0:0]
def make_ml_df_top(df, df_backtests, groupby_cols, dt_start, dt_stop, cperc_thresh, metric):
"""
"""
def calc_period_metrics(dd, dt_start, dt_stop):
"""
"""
dt_start = pd.Timestamp(dt_start)
dt_stop = pd.Timestamp(dt_stop)
ts = dd["timestamp"]
ix = (ts >= dt_start) & (ts <= dt_stop)
ys = dd["target_value"][ix]
yp = dd["demand"][ix]
if metric == "smape":
error = calc_smape(ys, yp)
elif metric == "wape":
error = calc_wape(ys, yp)
else:
raise NotImplementedError
return error
df.index.name = "timestamp"
dt_start = pd.Timestamp(dt_start).strftime("%Y-%m-%d")
dt_stop = pd.Timestamp(dt_stop).strftime("%Y-%m-%d")
df2 = df.query(f"timestamp >= '{dt_start}' and timestamp <= '{dt_stop}'")
total_demand = df2["demand"].sum()
# calculate per-group demand %
df_grp_demand = \
df2.groupby(groupby_cols, as_index=False, sort=False) \
.agg({"demand": sum})
df_grp_demand["perc"] = df_grp_demand["demand"] / total_demand * 100
# get the best models for each group
df_grp_metrics = \
df_backtests.groupby(groupby_cols, as_index=False, sort=False) \
.apply(lambda dd: calc_period_metrics(dd, dt_start, dt_stop)) \
.rename({None: metric}, axis=1)
df_grp_metrics["accuracy"] = 100 * (1-df_grp_metrics[metric])
df_grp_metrics.drop(metric, axis=1, inplace=True)
# combine, sort, and display
df_grp = df_grp_demand \
.merge(df_grp_metrics, on=groupby_cols, how="left") \
.sort_values(by="demand", ascending=False)
df_grp["cperc"] = df_grp["perc"].cumsum()
df_grp = df_grp.query(f"cperc <= {cperc_thresh}")
df_grp.rename({"perc": "% total demand", "accuracy": "% accuracy"}, axis=1, inplace=True)
df_grp.drop("cperc", axis=1, inplace=True)
# calc. summary row
df_grp_summary = df_grp.agg({"demand": sum, "% accuracy": np.nanmean})
df_grp_summary["% total demand"] = np.round(100 * df_grp_summary["demand"] / total_demand, 1)
df_grp_summary = pd.DataFrame(df_grp_summary).T[["demand", "% total demand", "% accuracy"]]
df_grp_summary.insert(0, "group by", ", ".join(groupby_cols))
df_grp_summary["% accuracy"] = df_grp_summary["% accuracy"].round(0)
df_grp["demand"] = df_grp["demand"].round(0)
df_grp["% total demand"] = df_grp["% total demand"].round(1)
df_grp["% accuracy"] = df_grp["% accuracy"].round(0)
df_grp.insert(0, "rank", np.arange(df_grp.shape[0]) + 1)
df_grp_summary["demand"] = df_grp_summary["demand"].round(0)
df_grp_summary["% total demand"] = df_grp_summary["% total demand"].round(1)
return df_grp, df_grp_summary