in kats/models/globalmodel/backtester.py [0:0]
def run_backtest(self) -> pd.DataFrame:
"""Run backtester.
Returns:
A `panda.DataFrame` object representing the backtest errors.
"""
data = self.data
n = len(data)
# train_test split
keys = np.array(list(data.keys())) if isinstance(data, Dict) else np.arange(n)
np.random.shuffle(keys)
all_test_TSs = {keys[i]: data[keys[i]] for i in range(self.test_size)}
all_train_TSs = {keys[i]: data[keys[i]] for i in range(self.test_size, n)}
self.test_ids = keys[: self.test_size]
bt_info = {}
m = int(self.replicate * self.splits)
num_core = np.min([m, self.max_core])
evaluation_collects = []
for bt in self.backtest_timestamp:
bt_train_TSs, bt_valid_TSs = self._filter(all_train_TSs, bt, "train")
bt_test_train_TSs, bt_test_valid_TSs = self._filter(
all_test_TSs, bt, "test"
)
bt_info[bt] = {
"num_train_TSs": len(bt_train_TSs),
"num_test_TSs": len(bt_test_train_TSs),
}
split_data = split(self.splits, self.overlap, bt_train_TSs, bt_valid_TSs)
if not self.multi:
t0 = time.time()
i = 0
for _ in range(self.replicate):
for train, valid in split_data:
info = self._fit_single_gm(
self.gm_collects[bt][i], train, valid
)
self.gm_info_collects[bt].append(info)
i += 1
logging.info(
f"fit {self.replicate*self.splits} gm time {time.time()-t0}"
)
else:
t0 = time.time()
rds = np.random.randint(1, 10000, m)
model_params = [
(
self.gm_collects[bt][i],
split_data[i % self.splits][0],
split_data[i % self.splits][1],
rds[i],
)
for i in range(m)
]
pool = Pool(num_core)
results = pool.starmap(self._fit_single_gm, model_params)
pool.close()
pool.join()
self.gm_info_collects[bt] = results
logging.info(f"fit {m} gm time {time.time()-t0}")
bt_eval = self._evaluate(
self.gm_collects[bt], bt_test_train_TSs, bt_test_valid_TSs
)
bt_eval["backtest_ts"] = bt
evaluation_collects.append(bt_eval)
logging.info(f"Successfully finish backtest for {bt}.")
self.bt_info = bt_info
return pd.concat(evaluation_collects)