in afa/app/app.py [0:0]
def download_afc_files():
"""
"""
df = state["report"]["data"]["df"]
status_dict = parse_s3_json(state.report["afc"]["status_json_s3_path"])
s3_export_path = status_dict["s3_export_path"]
prefix = status_dict["prefix"]
horiz = state["report"]["afc"]["horiz"]
freq = state["report"]["afc"]["freq"]
preds_s3_prefix = \
f'{s3_export_path}/{prefix}/{prefix}_processed.csv'
results_s3_prefix = \
f'{s3_export_path}/{prefix}/accuracy-metrics-values/Accuracy_{prefix}_*.csv'
backtests_s3_prefix = \
f'{s3_export_path}/{prefix}/forecasted-values/Forecasts_{prefix}_BacktestExportJob_*.csv'
_df_preds = wr.s3.read_csv(preds_s3_prefix,
dtype={"channel": str, "family": str, "item_id": str})
_preds = []
for _, dd in _df_preds.groupby(["channel", "family", "item_id"], as_index=False, sort=False):
dd.sort_values(by="timestamp", ascending=True, inplace=True)
if dd.shape[0] > horiz:
dd = dd.iloc[1:,:]
_preds.append(dd)
df_preds = pd.concat(_preds)
df_preds["type"] = "fcast"
df_preds["timestamp"] = pd.DatetimeIndex(df_preds["timestamp"])
df_actual = state["report"]["data"].get("df2", None)
if df_actual is None:
df_actual = get_df_resampled(df, freq)
df_preds = df_preds.append(
df_actual
.reset_index()
.rename({"index": "timestamp"}, axis=1)
.assign(type='actual'))
df_preds["channel"] = df_preds["channel"].str.upper()
df_preds["family"] = df_preds["family"].str.upper()
df_preds["item_id"] = df_preds["item_id"].str.upper()
freq = FREQ_MAP_PD[state.report["afc"]["freq"]]
df_results = wr.s3.read_csv(results_s3_prefix,
dtype={"channel": str, "family": str, "item_id": str})
df_results[["channel", "family", "item_id"]] = \
df_results["item_id"].str.split("@@", expand=True)
df_backtests = \
wr.s3.read_csv(backtests_s3_prefix)
df_backtests[["channel", "family", "item_id"]] = \
df_backtests["item_id"].str.split("@@", expand=True)
df_backtests["timestamp"] = pd.DatetimeIndex(df_backtests["backtestwindow_end_time"])
df_backtests["p10"] = np.clip(df_backtests["p10"], 0, None)
df_backtests["demand"] = np.round(np.clip(df_backtests["p50"], 0, None), 0)
df_backtests["target_value"] = df_backtests["target_value"].round(0)
df_backtests = df_backtests[["timestamp", "channel", "family", "item_id",
"demand", "p10", "p90", "target_value"]]
df_backtests.sort_values(by=["channel", "family", "item_id", "timestamp"],
inplace=True)
return df_preds, df_results, df_backtests