in src/sagemaker_xgboost_container/checkpointing.py [0:0]
def after_iteration(self, model, epoch=0, evals_log=None) -> bool:
# rank: master node has rank 0.
# iteration: current boosting round
# end_iteration: round # when training will end. this is always num_round + 1.
# model: model object
if self.rank != 0:
logger.debug("Not master (rank = %d). Exiting checkpoint callback.", self.rank)
return
if len(os.listdir(self.checkpoint_dir)) != 0:
xgb_model, self.iteration = load_checkpoint(self.checkpoint_dir)
current_iteration = self.iteration
else:
current_iteration = self.start_iteration + self.iteration
self._save_checkpoint(model, current_iteration)
# For example, if we are at iteration 5 and max_to_keep is 5, we no
# longer need checkpoint from iteration 0 (i.e., xgboost-checkpoint.0),
# so we put iteration_to_delete = 0 on the queue.
iteration_to_delete = current_iteration - self.max_to_keep
self.delete_queue.put(iteration_to_delete)
offset_iteration = self.end_iteration if self.num_round is None else self.num_round
training_has_ended = current_iteration + 1 >= self.start_iteration + offset_iteration
if training_has_ended:
self.stop()
return False