in afa/core.py [0:0]
def make_perf_summary(df_results, metric="smape"):
"""Generate the dataframe summarizing the overall performance, primarily
for use in the UI:
- distribution of model types that were selected for each timeseries
- errors for the best models
- errors for the naive models
Returns
-------
pd.DataFrame, pd.Series, pd.Series
"""
df_best = df_results.query("rank == 1")
# tabulate distribution of model types selected across the entire dataset
df_model_dist = pd.DataFrame({"model_type": df_results["model_type"].unique()}) \
.merge(
df_best["model_type"] \
.value_counts(normalize=True) \
.reset_index() \
.rename({"model_type": "perc",
"index": "model_type"}, axis=1),
how="left", on="model_type") \
.merge(
df_best["model_type"] \
.value_counts() \
.reset_index() \
.rename({"model_type": "freq",
"index": "model_type"}, axis=1),
how="left", on="model_type")
df_model_dist["perc"] = df_model_dist["perc"].fillna(0.) * 100.
df_model_dist["freq"] = df_model_dist["freq"].fillna(0.)
# get the metrics for the best naive models of each timeseries
df_best_naive = \
df_results.query("model_type == 'naive' and params == 'naive'") \
.sort_values(by=["rank"]) \
.groupby(GROUP_COLS, as_index=False, sort=False) \
.first()
naive_err_mean = np.nanmean(np.hstack(df_best_naive[metric])).round(4)
naive_err_median = np.nanmedian(np.hstack(df_best_naive[metric])).round(4)
naive_err_std = np.nanstd(np.hstack(df_best_naive[metric])).round(4)
naive_err = pd.Series({"err_mean": naive_err_mean,
"err_median": naive_err_median,
"err_std": naive_err_std })
# summarize the metrics and improvement vs. naive
err_mean = np.nanmean(np.hstack(df_best[metric])).round(4)
err_median = np.nanmedian(np.hstack(df_best[metric])).round(4)
err_std = np.nanstd(np.hstack(df_best[metric])).round(4)
best_err = pd.Series({"err_mean": err_mean,
"err_median": err_median,
"err_std": err_std })
return df_model_dist, best_err, naive_err