in orbit/diagnostics/backtest.py [0:0]
def split(self):
"""
Returns
-------
iterables with (train_df, test_df, scheme, split_key) where
train_df : pd.DataFrame
data split for training
test_df : pd.DataFrame
data split for testing/validation
scheme : dict
derived from self._split_scheme
split_key : int
index of the iteration
"""
if self.date_col is None:
for split_key, scheme in self._split_scheme.items():
train_df = self.df.iloc[
scheme[TimeSeriesSplitSchemeKeys.TRAIN_IDX.value], :
].reset_index(drop=True)
test_df = self.df.iloc[
scheme[TimeSeriesSplitSchemeKeys.TEST_IDX.value], :
].reset_index(drop=True)
yield train_df, test_df, scheme, split_key
else:
for split_key, scheme in self._split_scheme.items():
train_df = self.df.loc[
(self.df[self.date_col] >= scheme["train_period"][0])
& (self.df[self.date_col] <= scheme["train_period"][1]),
:,
].reset_index(drop=True)
test_df = self.df.loc[
(self.df[self.date_col] >= scheme["test_period"][0])
& (self.df[self.date_col] <= scheme["test_period"][1]),
:,
].reset_index(drop=True)
yield train_df, test_df, scheme, split_key