in activemri/baselines/ddqn.py [0:0]
def __call__(self):
training_done = False
while not training_done:
training_done = self.check_if_train_done()
self.logger.info(f"Is training done? {training_done}.")
checkpoint_episode, timestamp = self.load_latest_policy()
if timestamp is None or timestamp <= self.last_time_stamp:
# No new policy checkpoint to evaluate
self.logger.info(
"No new policy to evaluate. "
"I will wait for 10 minutes before trying again."
)
time.sleep(600)
continue
self.logger.info(
f"Found a new checkpoint with timestamp {timestamp}, "
f"I will start evaluation now."
)
test_scores, _ = evaluation.evaluate(
self.env,
self.policy,
self.options.num_test_episodes,
self.options.seed,
"val",
verbose=True,
)
auc_score = test_scores[self.options.reward_metric].sum(axis=1).mean()
if "mse" in self.options.reward_metric:
auc_score *= -1
self.logger.info(f"The test score for the model was {auc_score}.")
self.last_time_stamp = timestamp
if auc_score > self.best_test_score:
self.save_tester_checkpoint()
policy_path = os.path.join(self.evaluation_dir, "policy_best.pt")
self.save_policy(policy_path, checkpoint_episode)
self.best_test_score = auc_score
self.logger.info(
f"Saved DQN model with score {self.best_test_score} to {policy_path}, "
f"corresponding to episode {checkpoint_episode}."
)