in src/hyperpod_nemo_adapter/collections/parts/sagemaker_trainer_builder.py [0:0]
def create_trainer(self, callbacks=None) -> Trainer:
strategy = self._training_strategy()
plugins = self._create_plugins()
callbacks = self._create_callbacks(callbacks)
trainer = Trainer(
strategy=strategy,
max_steps=self.cfg.trainer.max_steps,
logger=False, # Logger will be configured in exp_manager, set to false here to prevent conflict
plugins=plugins,
callbacks=callbacks,
log_every_n_steps=self.cfg.trainer.log_every_n_steps,
# Disable deafult lightning ModelCheckpoint if none of them are used.
enable_checkpointing=self.use_generic_checkpoint or self.use_resilience_checkpoint,
val_check_interval=self.cfg.trainer.val_check_interval,
limit_val_batches=self.cfg.trainer.limit_val_batches,
devices=self.cfg.trainer.get("devices", "auto"),
)
data_module = self._create_data_module(trainer)
return trainer, data_module