def _monitor_candidates()

in src/hyperpod_nemo_adapter/utils/callbacks/checkpoint.py [0:0]


    def _monitor_candidates(self, trainer: "pl.Trainer") -> Dict[str, Tensor]:
        """
        Retrieve the callback_metrics from trainer.
        """
        monitor_candidates = deepcopy(trainer.callback_metrics)
        # cast to int if necessary because `self.log("epoch", 123)` will convert it to float. if it's not a tensor
        # or does not exist we overwrite it as it's likely an error
        epoch = monitor_candidates.get("epoch")
        monitor_candidates["epoch"] = epoch.int() if isinstance(epoch, Tensor) else torch.tensor(trainer.current_epoch)
        step = monitor_candidates.get("step")
        monitor_candidates["step"] = step.int() if isinstance(step, Tensor) else torch.tensor(trainer.global_step)
        return monitor_candidates