def run_cv_select()

in afa/core.py [0:0]
58 lines of code
10 McCabe index (conditional complexity)

def run_cv_select(df, horiz, freq, metric="smape", cv_stride=3,
    cv_periods=None, show_progress=False):
    """Run the timeseries cross-val model selection across the forecasting
    functions for a single timeseries (`y`) and horizon length (`horiz`).

    """

    if horiz is None:
        horiz = int(df.iloc[0]["horiz"])
        
    channel = df.iloc[0]["channel"]
    family = df.iloc[0]["family"]
    item_id = df.iloc[0]["item_id"]

    assert len(df["demand"]) > 0

    # these are the model configurations to run
    grid = create_model_grid()
    #grid = [g for g in grid if "|log" not in g[0]]

    #
    # decompose sliding windows once
    #
    period = DC_PERIODS[freq]

    y = df["demand"].values

    if horiz >= len(y):
        cv_horiz = len(y) - 1
    else:
        cv_horiz = horiz

    if cv_periods is None:
        cv_start = 1
    else:
        cv_start = max(1, y.shape[0] - cv_periods) 

    dc_dict = {}

    y = df["demand"].values
    y = np.nan_to_num(y)

    if len(y) == 1:
        y = np.pad(y, [1,0], constant_values=1)

    for i in range(cv_start, len(y)):
        try:
            dc = sm.tsa.seasonal_decompose(y[:i], period=period, two_sided=False)
            yp_seasonal = fourier(dc.seasonal, horiz, freq, seasonal=False)
            vals = (dc.resid, dc.trend, yp_seasonal)
        except:
            vals = None

        dc_dict[i] = vals

    results = [run_cv(cfg, df, horiz, freq, cv_start, cv_stride,
                      dc_dict=dc_dict, metric=metric) for cfg in grid]

    df_results = pd.concat(results)

    # rank results by the metric
    df_results.sort_values(by=metric + "_mean", ascending=True, inplace=True)
    df_results["rank"] = np.arange(len(df_results)) + 1

    df_results.insert(0, "channel", channel)
    df_results.insert(1, "family", family)
    df_results.insert(2, "item_id", item_id)
    df_results.insert(3, "horiz", horiz)

    # get the forecast from the best model
    yhat = df_results.iloc[0]["yhat"]
    yhat_ts = pd.date_range(df.index.max(),
                     periods=len(yhat)+1, freq=freq, closed="right")[:len(yhat)]
    
    # get the timeseries indices of the horizons
    df_horiz = df.iloc[-1].index

    assert len(yhat_ts) > 0
    assert len(yhat_ts) == len(yhat), f"{yhat_ts} {yhat}"

    # make the forecast dataframe
    df_pred = pd.DataFrame({"demand": yhat, "timestamp": yhat_ts})

    df_pred.insert(0, "channel", channel)
    df_pred.insert(1, "family", family)
    df_pred.insert(2, "item_id", item_id)
    df_pred.insert(3, "horiz", horiz)
    
    df_pred["type"] = "fcast"
    df_pred.set_index("timestamp", drop=False, inplace=True)

    df["type"] = "actual"

    # combine historical and predictions dataframes, re-ordering columns
    df_pred = df[GROUP_COLS + ["demand", "type"]] \
                .append(df_pred)[GROUP_COLS + ["demand", "type"]]

    return df_pred, df_results