def eval_single_checkpoint()

in src/nanotron/eval/one_job_runner.py [0:0]


    def eval_single_checkpoint(self, uploaded_files: List[dict]) -> Tuple[str, str]:
        """Run light evaluation on uploaded files."""
        if (
            self.config.lighteval.eval_interval is not None
            and self.config.general.step % self.config.lighteval.eval_interval != 0
        ):
            logger.debug(
                f"Skipping evaluation at step {self.config.general.step} because eval_interval is {self.config.lighteval.eval_interval}"
            )
            return
        config_files = [
            f for f in uploaded_files if "config.py" in f["destination"] or "config.yaml" in f["destination"]
        ]
        # Sanity check on the config files len (we want only one)
        if len(config_files) == 0:
            log_rank(
                "No config files founds in uploaded checkpoints. Not running evaluation.",
                logger=logger,
                level=logging.ERROR,
                group=self.parallel_context.dp_pg if self.parallel_context is not None else None,
                rank=0,
            )
            return
        if len(config_files) > 1:
            log_rank(
                f"Found multiple config files in uploaded checkpoints: {config_files}",
                logger=logger,
                level=logging.ERROR,
                group=self.parallel_context.dp_pg if self.parallel_context is not None else None,
                rank=0,
            )
            return
        checkpoint_path = config_files[0]["destination"].replace("config.yaml", "")
        logger.warning(
            f"Lighteval Runner got {len(uploaded_files)} files. Using {checkpoint_path} as checkpoint path."
        )
        if self.config.general.step % self.lighteval_config.eval_interval == 0:
            slurm_job_id, slurm_log = run_slurm_one_job(
                config=self.config,
                lighteval_config=self.lighteval_config,
                model_checkpoint_path=checkpoint_path,
                current_step=self.config.general.step,
            )
        else:
            logger.warning(
                f"Skipping evaluation at step {self.config.general.step} because it's not a multiple of {self.lighteval_config.eval_interval}"
            )
            return None, None

        return slurm_job_id, slurm_log