in optimum/graphcore/training_args.py [0:0]
def __post_init__(self):
# convert to int
self.log_level = trainer_log_levels[self.log_level]
# self.log_level_replica = trainer_log_levels[self.log_level_replica]
# expand paths, if not os.makedirs("~/bar") will make directory
# in the current directory instead of the actual home
# see https://github.com/huggingface/transformers/issues/10628
if self.output_dir is not None:
self.output_dir = os.path.expanduser(self.output_dir)
if self.logging_dir is None and self.output_dir is not None:
self.logging_dir = os.path.join(self.output_dir, default_logdir())
if self.logging_dir is not None:
self.logging_dir = os.path.expanduser(self.logging_dir)
if self.disable_tqdm is None:
self.disable_tqdm = logger.getEffectiveLevel() > logging.WARN
if isinstance(self.evaluation_strategy, EvaluationStrategy):
warnings.warn(
"Using `EvaluationStrategy` for `evaluation_strategy` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `IntervalStrategy` instead",
FutureWarning,
)
# Go back to the underlying string or we won't be able to instantiate `IntervalStrategy` on it.
self.evaluation_strategy = self.evaluation_strategy.value
self.evaluation_strategy = IntervalStrategy(self.evaluation_strategy)
self.logging_strategy = IntervalStrategy(self.logging_strategy)
self.save_strategy = IntervalStrategy(self.save_strategy)
self.hub_strategy = HubStrategy(self.hub_strategy)
self.lr_scheduler_type = SchedulerType(self.lr_scheduler_type)
if self.do_eval is False and self.evaluation_strategy != IntervalStrategy.NO:
self.do_eval = True
# eval_steps has to be defined and non-zero, fallbacks to logging_steps if the latter is non-zero
if self.evaluation_strategy == IntervalStrategy.STEPS and (self.eval_steps is None or self.eval_steps == 0):
if self.logging_steps > 0:
logger.info(f"Using `logging_steps` to initialize `eval_steps` to {self.logging_steps}")
self.eval_steps = self.logging_steps
else:
raise ValueError(
f"Evaluation strategy {self.evaluation_strategy} requires either non-zero --eval_steps or --logging_steps"
)
# logging_steps must be non-zero for logging_strategy that is other than 'no'
if self.logging_strategy == IntervalStrategy.STEPS and self.logging_steps == 0:
raise ValueError(f"Logging strategy {self.logging_strategy} requires non-zero --logging_steps")
# Sanity checks for load_best_model_at_end: we require save and eval strategies to be compatible.
if self.load_best_model_at_end:
if self.evaluation_strategy != self.save_strategy:
raise ValueError(
"--load_best_model_at_end requires the save and eval strategy to match, but found\n- Evaluation "
f"strategy: {self.evaluation_strategy}\n- Save strategy: {self.save_strategy}"
)
if self.evaluation_strategy == IntervalStrategy.STEPS and self.save_steps % self.eval_steps != 0:
raise ValueError(
"--load_best_model_at_end requires the saving steps to be a round multiple of the evaluation "
f"steps, but found {self.save_steps}, which is not a round multiple of {self.eval_steps}."
)
if self.load_best_model_at_end and self.metric_for_best_model is None:
self.metric_for_best_model = "loss"
if self.greater_is_better is None and self.metric_for_best_model is not None:
self.greater_is_better = self.metric_for_best_model not in ["loss", "eval_loss"]
if self.run_name is None:
self.run_name = self.output_dir
if self.report_to == "all" or self.report_to == ["all"]:
# Import at runtime to avoid a circular import.
from transformers.integrations import get_available_reporting_integrations
self.report_to = get_available_reporting_integrations()
elif self.report_to is None or self.report_to == "none" or self.report_to == ["none"]:
self.report_to = []
elif not isinstance(self.report_to, list):
self.report_to = [self.report_to]
if self.warmup_ratio < 0 or self.warmup_ratio > 1:
raise ValueError("warmup_ratio must lie in range [0,1]")
elif self.warmup_ratio > 0 and self.warmup_steps > 0:
logger.info(
"Both warmup_ratio and warmup_steps given, warmup_steps will override any effect of warmup_ratio during training."
)
if isinstance(self.debug, str):
self.debug = [DebugOption(s) for s in self.debug.split()]
if self.push_to_hub_token is not None:
warnings.warn(
"`--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use "
"`--hub_token` instead.",
FutureWarning,
)
self.hub_token = self.push_to_hub_token
if self.push_to_hub_model_id is not None:
self.hub_model_id = get_full_repo_name(
self.push_to_hub_model_id, organization=self.push_to_hub_organization, token=self.hub_token
)
if self.push_to_hub_organization is not None:
warnings.warn(
"`--push_to_hub_model_id` and `--push_to_hub_organization` are deprecated and will be removed in "
"version 5 of 🤗 Transformers. Use `--hub_model_id` instead and pass the full repo name to this "
f"argument (in this case {self.hub_model_id}).",
FutureWarning,
)
else:
warnings.warn(
"`--push_to_hub_model_id` is deprecated and will be removed in version 5 of 🤗 Transformers. Use "
"`--hub_model_id` instead and pass the full repo name to this argument (in this case "
f"{self.hub_model_id}).",
FutureWarning,
)
elif self.push_to_hub_organization is not None:
self.hub_model_id = f"{self.push_to_hub_organization}/{Path(self.output_dir).name}"
warnings.warn(
"`--push_to_hub_organization` is deprecated and will be removed in version 5 of 🤗 Transformers. Use "
"`--hub_model_id` instead and pass the full repo name to this argument (in this case "
f"{self.hub_model_id}).",
FutureWarning,
)
# IPU specific
dataloader_mode_mapping = {"sync": 0, "async": 1, "async_rebatched": 2}
self.dataloader_mode = DataLoaderMode(dataloader_mode_mapping[self.dataloader_mode])
override_str = []
if self.gradient_accumulation_steps is not None:
override_str.append(f"gradient_accumulation_steps={self.gradient_accumulation_steps}")
else:
self.gradient_accumulation_steps = 1
if self.auto_loss_scaling:
override_str.append(f"auto_loss_scaling={self.auto_loss_scaling}")
if self.gradient_checkpointing:
override_str.append("recompute_checkpoint_every_layer=True")
if override_str:
override_str = ",".join(override_str)
if self.ipu_config_overrides is None:
self.ipu_config_overrides = override_str
else:
self.ipu_config_overrides = ",".join([self.ipu_config_overrides, override_str])