in src/hyperpod_nemo_adapter/utils/callbacks/checkpoint.py [0:0]
def _monitor_candidates(self, trainer: "pl.Trainer") -> Dict[str, Tensor]:
"""
Retrieve the callback_metrics from trainer.
"""
monitor_candidates = deepcopy(trainer.callback_metrics)
# cast to int if necessary because `self.log("epoch", 123)` will convert it to float. if it's not a tensor
# or does not exist we overwrite it as it's likely an error
epoch = monitor_candidates.get("epoch")
monitor_candidates["epoch"] = epoch.int() if isinstance(epoch, Tensor) else torch.tensor(trainer.current_epoch)
step = monitor_candidates.get("step")
monitor_candidates["step"] = step.int() if isinstance(step, Tensor) else torch.tensor(trainer.global_step)
return monitor_candidates