in train_dense_encoder.py [0:0]
def validate_and_save(self, epoch: int, iteration: int, scheduler):
cfg = self.cfg
# for distributed mode, save checkpoint for only one process
save_cp = cfg.local_rank in [-1, 0]
if epoch == cfg.val_av_rank_start_epoch:
self.best_validation_result = None
if not cfg.dev_datasets:
validation_loss = 0
else:
if epoch >= cfg.val_av_rank_start_epoch:
validation_loss = self.validate_average_rank()
else:
validation_loss = self.validate_nll()
if save_cp:
cp_name = self._save_checkpoint(scheduler, epoch, iteration)
logger.info("Saved checkpoint to %s", cp_name)
if validation_loss < (self.best_validation_result or validation_loss + 1):
self.best_validation_result = validation_loss
self.best_cp_name = cp_name
logger.info("New Best validation checkpoint %s", cp_name)