in optimum/graphcore/trainer.py [0:0]
def _save(self, output_dir: Optional[str] = None, state_dict=None):
# If we are executing this function, we are the process zero, so we don't check for that.
output_dir = output_dir if output_dir is not None else self.args.output_dir
os.makedirs(output_dir, exist_ok=True)
logger.info(f"Saving model checkpoint to {output_dir}")
# Updating self.model weights with the weights stored on device.
# TODO: can this be deleted? I would makle things faster.
if self.training_model is not None and self.training_model.isAttachedToDevice():
self.training_model.copyWeightsToHost()
# Save a trained model and configuration using `save_pretrained()`.
# They can then be reloaded using `from_pretrained()`
if not isinstance(self.model, (PreTrainedModel, PeftModel)):
logger.info(
"Trainer.model is not a `transformers.PreTrainedModel` or `peft.PeftModel`, only saving its state dict."
)
if state_dict is None:
state_dict = self.model.state_dict()
torch.save(state_dict, os.path.join(output_dir, WEIGHTS_NAME))
else:
rng_state = torch.random.get_rng_state()
self.model.deparallelize()
self.model.save_pretrained(output_dir, state_dict=state_dict)
self.model.parallelize(**self.model.ipu_config.parallelize_kwargs)
torch.random.set_rng_state(rng_state)
if self.tokenizer is not None:
self.tokenizer.save_pretrained(output_dir)
self.ipu_config.save_pretrained(output_dir)
# Good practice: save your training arguments together with the trained model
torch.save(self.args, os.path.join(output_dir, TRAINING_ARGS_NAME))