in orbit/diagnostics/backtest.py [0:0]
def fit_predict(self):
"""Fit and predict on each data split and set predicted_df
Since this part of the backtesting is generally the most expensive, BackTester
breaks up fit/predict and scoring into two separate calls
Returns
-------
None
"""
splitter = self._splitter
model = self.model
response_col = model.response_col
date_col = model.date_col
output_res = list()
for train_df, test_df, scheme, key in splitter.split():
model_copy = deepcopy(model)
model_copy.fit(train_df)
train_predictions = model_copy.predict(train_df)
test_predictions = model_copy.predict(test_df)
all_pred_cols = [x for x in train_predictions.columns if x != date_col]
# set attributes
self._fitted_models.append(model_copy)
self._splitter_scheme.append(scheme)
self._test_actual = np.concatenate(
(self._test_actual, test_df[response_col].to_numpy())
)
self._test_prediction = np.concatenate(
(
self._test_prediction,
test_predictions[BacktestFitKeys.PREDICTED.value].to_numpy(),
)
)
self._train_actual = np.concatenate(
(self._train_actual, train_df[response_col].to_numpy())
)
self._train_prediction = np.concatenate(
(
self._train_prediction,
train_predictions[BacktestFitKeys.PREDICTED.value].to_numpy(),
)
)
# set df attribute
# join train
train_dates = train_df[date_col].rename(BacktestFitKeys.DATE.value)
train_response = train_df[response_col].rename(BacktestFitKeys.ACTUAL.value)
train_values = pd.concat(
(train_dates, train_response, train_predictions[all_pred_cols]), axis=1
)
train_values[BacktestFitKeys.TRAIN_FLAG.value] = True
# join test
test_dates = test_df[date_col].rename(BacktestFitKeys.DATE.value)
test_response = test_df[response_col].rename(BacktestFitKeys.ACTUAL.value)
test_values = pd.concat(
(test_dates, test_response, test_predictions[all_pred_cols]), axis=1
)
test_values[BacktestFitKeys.TRAIN_FLAG.value] = False
# union train/test
both_values = pd.concat((train_values, test_values), axis=0)
both_values[BacktestFitKeys.SPLIT_KEY.value] = key
output_res.append(both_values)
# union each splits
self._predicted_df = pd.concat(output_res, axis=0).reset_index(drop=True)