in orbit/diagnostics/backtest.py [0:0]
def _set_split_scheme(self):
"""set meta data of ways to split train and test set"""
test_end_min = self.min_train_len - 1
test_end_max = self._full_len - self.forecast_len
test_seq = range(test_end_min, test_end_max, self.incremental_len)
split_scheme = {}
# note that
# in range representation, inclusive bound on the left and exclusive bound on the right is used
# in date periods representation, both bound are inclusive to work around limitation on df[date_col][idx]
for i, train_end_idx in enumerate(test_seq):
split_scheme[i] = {}
train_start_idx = (
train_end_idx - self.min_train_len + 1
if self.window_type
== TimeSeriesSplitSchemeKeys.SPLIT_TYPE_ROLLING.value
else 0
)
split_scheme[i][TimeSeriesSplitSchemeKeys.TRAIN_IDX.value] = range(
train_start_idx, train_end_idx + 1
)
split_scheme[i][TimeSeriesSplitSchemeKeys.TEST_IDX.value] = range(
train_end_idx + 1, train_end_idx + self.forecast_len + 1
)
if self.date_col is not None:
split_scheme[i]["train_period"] = (
self.dt_array[train_start_idx],
self.dt_array[train_end_idx],
)
split_scheme[i]["test_period"] = (
self.dt_array[train_end_idx + 1],
self.dt_array[train_end_idx + self.forecast_len],
)
self._split_scheme = split_scheme
# enforce n_splits to match scheme in case scheme is determined by min_train_len
self.n_splits = len(split_scheme)